@midscene/core 0.26.7-beta-20250815153024.0 → 0.26.7-beta-20250818024834.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/ai-model/common.mjs +37 -16
- package/dist/es/ai-model/common.mjs.map +1 -1
- package/dist/es/ai-model/index.mjs +2 -2
- package/dist/es/ai-model/llm-planning.mjs +1 -1
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/lib/ai-model/common.js +36 -18
- package/dist/lib/ai-model/common.js.map +1 -1
- package/dist/lib/ai-model/index.js +1 -4
- package/dist/lib/ai-model/llm-planning.js +1 -1
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/types/ai-model/common.d.ts +2 -3
- package/dist/types/ai-model/index.d.ts +0 -1
- package/dist/types/types.d.ts +0 -1
- package/dist/types/yaml.d.ts +6 -5
- package/package.json +3 -3
|
@@ -13,7 +13,6 @@ var common_AIActionType = /*#__PURE__*/ function(AIActionType) {
|
|
|
13
13
|
AIActionType[AIActionType["DESCRIBE_ELEMENT"] = 4] = "DESCRIBE_ELEMENT";
|
|
14
14
|
return AIActionType;
|
|
15
15
|
}({});
|
|
16
|
-
const actionSpaceTypePrefix = 'action_space_';
|
|
17
16
|
async function callAiFn(msgs, AIActionTypeValue) {
|
|
18
17
|
const jsonObject = await callToGetJSONObject(msgs, AIActionTypeValue);
|
|
19
18
|
return {
|
|
@@ -170,29 +169,51 @@ async function markupImageForLLM(screenshotBase64, tree, size) {
|
|
|
170
169
|
});
|
|
171
170
|
return imagePayload;
|
|
172
171
|
}
|
|
173
|
-
function buildYamlFlowFromPlans(plans,
|
|
172
|
+
function buildYamlFlowFromPlans(plans, sleep) {
|
|
174
173
|
const flow = [];
|
|
175
174
|
for (const plan of plans){
|
|
176
175
|
var _plan_locate;
|
|
177
|
-
const
|
|
178
|
-
const action = actionSpace.find((action)=>action.name === verb);
|
|
179
|
-
if (!action) {
|
|
180
|
-
console.warn(`Cannot convert action ${verb} to yaml flow. Will ignore it.`);
|
|
181
|
-
continue;
|
|
182
|
-
}
|
|
176
|
+
const type = plan.type;
|
|
183
177
|
const locate = null == (_plan_locate = plan.locate) ? void 0 : _plan_locate.prompt;
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
178
|
+
if ('Tap' === type) flow.push({
|
|
179
|
+
aiTap: locate
|
|
180
|
+
});
|
|
181
|
+
else if ('Hover' === type) flow.push({
|
|
182
|
+
aiHover: locate
|
|
183
|
+
});
|
|
184
|
+
else if ('Input' === type) {
|
|
185
|
+
const param = plan.param;
|
|
186
|
+
flow.push({
|
|
187
|
+
aiInput: param.value,
|
|
188
|
+
locate
|
|
189
|
+
});
|
|
190
|
+
} else if ('KeyboardPress' === type) {
|
|
191
|
+
const param = plan.param;
|
|
192
|
+
flow.push({
|
|
193
|
+
aiKeyboardPress: param.value,
|
|
194
|
+
locate
|
|
195
|
+
});
|
|
196
|
+
} else if ('Scroll' === type) {
|
|
197
|
+
const param = plan.param;
|
|
198
|
+
flow.push({
|
|
199
|
+
aiScroll: null,
|
|
200
|
+
locate,
|
|
201
|
+
direction: param.direction,
|
|
202
|
+
scrollType: param.scrollType,
|
|
203
|
+
distance: param.distance
|
|
204
|
+
});
|
|
205
|
+
} else if ('Sleep' === type) {
|
|
206
|
+
const param = plan.param;
|
|
207
|
+
flow.push({
|
|
208
|
+
sleep: param.timeMs
|
|
209
|
+
});
|
|
210
|
+
} else 'AndroidBackButton' === type || 'AndroidHomeButton' === type || 'AndroidRecentAppsButton' === type || 'AndroidLongPress' === type || 'AndroidPull' === type || 'Error' === type || 'Assert' === type || 'AssertWithoutThrow' === type || 'Finished' === type || console.warn(`Cannot convert action ${type} to yaml flow. This should be a bug of Midscene.`);
|
|
190
211
|
}
|
|
191
212
|
if (sleep) flow.push({
|
|
192
|
-
sleep
|
|
213
|
+
sleep: sleep
|
|
193
214
|
});
|
|
194
215
|
return flow;
|
|
195
216
|
}
|
|
196
|
-
export { common_AIActionType as AIActionType,
|
|
217
|
+
export { common_AIActionType as AIActionType, adaptBbox, adaptBboxToRect, adaptDoubaoBbox, adaptGeminiBbox, adaptQwenBbox, buildYamlFlowFromPlans, callAiFn, expandSearchArea, fillBboxParam, markupImageForLLM, mergeRects, warnGPT4oSizeLimit };
|
|
197
218
|
|
|
198
219
|
//# sourceMappingURL=common.mjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/common.mjs","sources":["webpack://@midscene/core/./src/ai-model/common.ts"],"sourcesContent":["import type {\n AIUsageInfo,\n BaseElement,\n DeviceAction,\n ElementTreeNode,\n MidsceneYamlFlowItem,\n PlanningAction,\n Rect,\n Size,\n} from '@/types';\nimport { assert } from '@midscene/shared/utils';\n\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport {\n call,\n callToGetJSONObject,\n getModelName,\n} from './service-caller/index';\n\nimport type { PlanningLocateParam } from '@/types';\nimport { NodeType } from '@midscene/shared/constants';\nimport { vlLocateMode } from '@midscene/shared/env';\nimport { treeToList } from '@midscene/shared/extractor';\nimport { compositeElementInfoImg } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nexport enum AIActionType {\n ASSERT = 0,\n INSPECT_ELEMENT = 1,\n EXTRACT_DATA = 2,\n PLAN = 3,\n DESCRIBE_ELEMENT = 4,\n}\n\nexport const actionSpaceTypePrefix = 'action_space_';\n\nexport async function callAiFn<T>(\n msgs: AIArgs,\n AIActionTypeValue: AIActionType,\n): Promise<{ content: T; usage?: AIUsageInfo }> {\n const jsonObject = await callToGetJSONObject<T>(msgs, AIActionTypeValue);\n\n return {\n content: jsonObject.content,\n usage: jsonObject.usage,\n };\n}\n\nconst defaultBboxSize = 20; // must be even number\nconst debugInspectUtils = getDebug('ai:common');\n\n// transform the param of locate from qwen mode\nexport function fillBboxParam(\n locate: PlanningLocateParam,\n width: number,\n height: number,\n) {\n // The Qwen model might have hallucinations of naming bbox as bbox_2d.\n if ((locate as any).bbox_2d && !locate?.bbox) {\n locate.bbox = (locate as any).bbox_2d;\n // biome-ignore lint/performance/noDelete: <explanation>\n delete (locate as any).bbox_2d;\n }\n\n if (locate?.bbox) {\n locate.bbox = adaptBbox(locate.bbox, width, height);\n }\n\n return locate;\n}\n\nexport function adaptQwenBbox(\n bbox: number[],\n): [number, number, number, number] {\n if (bbox.length < 2) {\n const msg = `invalid bbox data for qwen-vl mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n }\n\n const result: [number, number, number, number] = [\n Math.round(bbox[0]),\n Math.round(bbox[1]),\n typeof bbox[2] === 'number'\n ? Math.round(bbox[2])\n : Math.round(bbox[0] + defaultBboxSize),\n typeof bbox[3] === 'number'\n ? Math.round(bbox[3])\n : Math.round(bbox[1] + defaultBboxSize),\n ];\n return result;\n}\n\nexport function adaptDoubaoBbox(\n bbox: string[] | number[] | string,\n width: number,\n height: number,\n): [number, number, number, number] {\n assert(\n width > 0 && height > 0,\n 'width and height must be greater than 0 in doubao mode',\n );\n\n if (typeof bbox === 'string') {\n assert(\n /^(\\d+)\\s(\\d+)\\s(\\d+)\\s(\\d+)$/.test(bbox.trim()),\n `invalid bbox data string for doubao-vision mode: ${bbox}`,\n );\n const splitted = bbox.split(' ');\n if (splitted.length === 4) {\n return [\n Math.round((Number(splitted[0]) * width) / 1000),\n Math.round((Number(splitted[1]) * height) / 1000),\n Math.round((Number(splitted[2]) * width) / 1000),\n Math.round((Number(splitted[3]) * height) / 1000),\n ];\n }\n throw new Error(`invalid bbox data string for doubao-vision mode: ${bbox}`);\n }\n\n if (Array.isArray(bbox) && Array.isArray(bbox[0])) {\n bbox = bbox[0];\n }\n\n let bboxList: number[] = [];\n if (Array.isArray(bbox) && typeof bbox[0] === 'string') {\n bbox.forEach((item) => {\n if (typeof item === 'string' && item.includes(',')) {\n const [x, y] = item.split(',');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else if (typeof item === 'string' && item.includes(' ')) {\n const [x, y] = item.split(' ');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else {\n bboxList.push(Number(item));\n }\n });\n } else {\n bboxList = bbox as any;\n }\n\n if (bboxList.length === 4 || bboxList.length === 5) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[2] * width) / 1000),\n Math.round((bboxList[3] * height) / 1000),\n ];\n }\n\n // treat the bbox as a center point\n if (\n bboxList.length === 6 ||\n bboxList.length === 2 ||\n bboxList.length === 3 ||\n bboxList.length === 7\n ) {\n return [\n Math.max(\n 0,\n Math.round((bboxList[0] * width) / 1000) - defaultBboxSize / 2,\n ),\n Math.max(\n 0,\n Math.round((bboxList[1] * height) / 1000) - defaultBboxSize / 2,\n ),\n Math.min(\n width,\n Math.round((bboxList[0] * width) / 1000) + defaultBboxSize / 2,\n ),\n Math.min(\n height,\n Math.round((bboxList[1] * height) / 1000) + defaultBboxSize / 2,\n ),\n ];\n }\n\n if (bbox.length === 8) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[4] * width) / 1000),\n Math.round((bboxList[5] * height) / 1000),\n ];\n }\n\n const msg = `invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n}\n\nexport function adaptBbox(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n if (vlLocateMode() === 'doubao-vision' || vlLocateMode() === 'vlm-ui-tars') {\n return adaptDoubaoBbox(bbox, width, height);\n }\n\n if (vlLocateMode() === 'gemini') {\n return adaptGeminiBbox(bbox, width, height);\n }\n\n return adaptQwenBbox(bbox);\n}\n\nexport function adaptGeminiBbox(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n const left = Math.round((bbox[1] * width) / 1000);\n const top = Math.round((bbox[0] * height) / 1000);\n const right = Math.round((bbox[3] * width) / 1000);\n const bottom = Math.round((bbox[2] * height) / 1000);\n return [left, top, right, bottom];\n}\n\nexport function adaptBboxToRect(\n bbox: number[],\n width: number,\n height: number,\n offsetX = 0,\n offsetY = 0,\n): Rect {\n debugInspectUtils('adaptBboxToRect', bbox, width, height, offsetX, offsetY);\n const [left, top, right, bottom] = adaptBbox(bbox, width, height);\n const rect = {\n left: left + offsetX,\n top: top + offsetY,\n width: right - left,\n height: bottom - top,\n };\n debugInspectUtils('adaptBboxToRect, result=', rect);\n return rect;\n}\n\nlet warned = false;\nexport function warnGPT4oSizeLimit(size: Size) {\n if (warned) return;\n if (getModelName()?.toLowerCase().includes('gpt-4o')) {\n const warningMsg = `GPT-4o has a maximum image input size of 2000x768 or 768x2000, but got ${size.width}x${size.height}. Please set your page to a smaller resolution. Otherwise, the result may be inaccurate.`;\n\n if (\n Math.max(size.width, size.height) > 2000 ||\n Math.min(size.width, size.height) > 768\n ) {\n console.warn(warningMsg);\n warned = true;\n }\n } else if (size.width > 1800 || size.height > 1800) {\n console.warn(\n `The image size seems too large (${size.width}x${size.height}). It may lead to more token usage, slower response, and inaccurate result.`,\n );\n warned = true;\n }\n}\n\nexport function mergeRects(rects: Rect[]) {\n const minLeft = Math.min(...rects.map((r) => r.left));\n const minTop = Math.min(...rects.map((r) => r.top));\n const maxRight = Math.max(...rects.map((r) => r.left + r.width));\n const maxBottom = Math.max(...rects.map((r) => r.top + r.height));\n return {\n left: minLeft,\n top: minTop,\n width: maxRight - minLeft,\n height: maxBottom - minTop,\n };\n}\n\n// expand the search area to at least 300 x 300, or add a default padding\nexport function expandSearchArea(rect: Rect, screenSize: Size) {\n const minEdgeSize = vlLocateMode() === 'doubao-vision' ? 500 : 300;\n const defaultPadding = 160;\n\n const paddingSizeHorizontal =\n rect.width < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.width) / 2)\n : defaultPadding;\n const paddingSizeVertical =\n rect.height < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.height) / 2)\n : defaultPadding;\n rect.left = Math.max(0, rect.left - paddingSizeHorizontal);\n rect.width = Math.min(\n rect.width + paddingSizeHorizontal * 2,\n screenSize.width - rect.left,\n );\n rect.top = Math.max(0, rect.top - paddingSizeVertical);\n rect.height = Math.min(\n rect.height + paddingSizeVertical * 2,\n screenSize.height - rect.top,\n );\n return rect;\n}\n\nexport async function markupImageForLLM(\n screenshotBase64: string,\n tree: ElementTreeNode<BaseElement>,\n size: Size,\n) {\n const elementsInfo = treeToList(tree);\n const elementsPositionInfoWithoutText = elementsInfo!.filter(\n (elementInfo) => {\n if (elementInfo.attributes.nodeType === NodeType.TEXT) {\n return false;\n }\n return true;\n },\n );\n\n const imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n elementsPositionInfo: elementsPositionInfoWithoutText,\n size,\n });\n return imagePayload;\n}\n\nexport function buildYamlFlowFromPlans(\n plans: PlanningAction[],\n actionSpace: DeviceAction[],\n sleep?: number,\n): MidsceneYamlFlowItem[] {\n const flow: MidsceneYamlFlowItem[] = [];\n\n for (const plan of plans) {\n const verb = plan.type;\n\n const action = actionSpace.find((action) => action.name === verb);\n if (!action) {\n console.warn(\n `Cannot convert action ${verb} to yaml flow. Will ignore it.`,\n );\n continue;\n }\n\n const locate = plan.locate?.prompt;\n const flowKey = action.interfaceAlias || `${actionSpaceTypePrefix}${verb}`;\n\n const flowItem: MidsceneYamlFlowItem = {\n [flowKey]: locate || '',\n ...(plan.param || {}),\n };\n\n flow.push(flowItem);\n }\n\n if (sleep) {\n flow.push({\n sleep,\n });\n }\n\n return flow;\n}\n"],"names":["AIActionType","actionSpaceTypePrefix","callAiFn","msgs","AIActionTypeValue","jsonObject","callToGetJSONObject","defaultBboxSize","debugInspectUtils","getDebug","fillBboxParam","locate","width","height","adaptBbox","adaptQwenBbox","bbox","msg","JSON","Error","result","Math","adaptDoubaoBbox","assert","splitted","Number","Array","bboxList","item","x","y","vlLocateMode","adaptGeminiBbox","left","top","right","bottom","adaptBboxToRect","offsetX","offsetY","rect","warned","warnGPT4oSizeLimit","size","_getModelName","warningMsg","console","mergeRects","rects","minLeft","r","minTop","maxRight","maxBottom","expandSearchArea","screenSize","minEdgeSize","defaultPadding","paddingSizeHorizontal","paddingSizeVertical","markupImageForLLM","screenshotBase64","tree","elementsInfo","treeToList","elementsPositionInfoWithoutText","elementInfo","NodeType","imagePayload","compositeElementInfoImg","buildYamlFlowFromPlans","plans","actionSpace","sleep","flow","plan","_plan_locate","verb","action","flowKey","flowItem"],"mappings":";;;;;;;AAkCO,IAAKA,sBAAYA,WAAAA,GAAAA,SAAZA,YAAY;;;;;;WAAZA;;AAQL,MAAMC,wBAAwB;AAE9B,eAAeC,SACpBC,IAAY,EACZC,iBAA+B;IAE/B,MAAMC,aAAa,MAAMC,oBAAuBH,MAAMC;IAEtD,OAAO;QACL,SAASC,WAAW,OAAO;QAC3B,OAAOA,WAAW,KAAK;IACzB;AACF;AAEA,MAAME,kBAAkB;AACxB,MAAMC,oBAAoBC,SAAS;AAG5B,SAASC,cACdC,MAA2B,EAC3BC,KAAa,EACbC,MAAc;IAGd,IAAKF,OAAe,OAAO,IAAI,CAACA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,AAAD,GAAG;QAC5CA,OAAO,IAAI,GAAIA,OAAe,OAAO;QAErC,OAAQA,OAAe,OAAO;IAChC;IAEA,IAAIA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,EACdA,OAAO,IAAI,GAAGG,UAAUH,OAAO,IAAI,EAAEC,OAAOC;IAG9C,OAAOF;AACT;AAEO,SAASI,cACdC,IAAc;IAEd,IAAIA,KAAK,MAAM,GAAG,GAAG;QACnB,MAAMC,MAAM,CAAC,oCAAoC,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;QAC1E,MAAM,IAAIG,MAAMF;IAClB;IAEA,MAAMG,SAA2C;QAC/CC,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QACC,YAAnB,OAAOA,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGT;QACN,YAAnB,OAAOS,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGT;KAC1B;IACD,OAAOa;AACT;AAEO,SAASE,gBACdN,IAAkC,EAClCJ,KAAa,EACbC,MAAc;IAEdU,OACEX,QAAQ,KAAKC,SAAS,GACtB;IAGF,IAAI,AAAgB,YAAhB,OAAOG,MAAmB;QAC5BO,OACE,+BAA+B,IAAI,CAACP,KAAK,IAAI,KAC7C,CAAC,iDAAiD,EAAEA,MAAM;QAE5D,MAAMQ,WAAWR,KAAK,KAAK,CAAC;QAC5B,IAAIQ,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACLH,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIZ,QAAS;YAC3CS,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIX,SAAU;YAC5CQ,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIZ,QAAS;YAC3CS,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIX,SAAU;SAC7C;QAEH,MAAM,IAAIM,MAAM,CAAC,iDAAiD,EAAEH,MAAM;IAC5E;IAEA,IAAIU,MAAM,OAAO,CAACV,SAASU,MAAM,OAAO,CAACV,IAAI,CAAC,EAAE,GAC9CA,OAAOA,IAAI,CAAC,EAAE;IAGhB,IAAIW,WAAqB,EAAE;IAC3B,IAAID,MAAM,OAAO,CAACV,SAAS,AAAmB,YAAnB,OAAOA,IAAI,CAAC,EAAE,EACvCA,KAAK,OAAO,CAAC,CAACY;QACZ,IAAI,AAAgB,YAAhB,OAAOA,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YAClD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OAAO,IAAI,AAAgB,YAAhB,OAAOF,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YACzD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OACEH,SAAS,IAAI,CAACF,OAAOG;IAEzB;SAEAD,WAAWX;IAGb,IAAIW,AAAoB,MAApBA,SAAS,MAAM,IAAUA,AAAoB,MAApBA,SAAS,MAAM,EAC1C,OAAO;QACLN,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;QACpCQ,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;KACrC;IAIH,IACEc,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,EAEf,OAAO;QACLN,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS,QAAQL,kBAAkB;QAE/Dc,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU,QAAQN,kBAAkB;QAEhEc,KAAK,GAAG,CACNT,OACAS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS,QAAQL,kBAAkB;QAE/Dc,KAAK,GAAG,CACNR,QACAQ,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU,QAAQN,kBAAkB;KAEjE;IAGH,IAAIS,AAAgB,MAAhBA,KAAK,MAAM,EACb,OAAO;QACLK,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;QACpCQ,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;KACrC;IAGH,MAAMI,MAAM,CAAC,0CAA0C,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;IAChF,MAAM,IAAIG,MAAMF;AAClB;AAEO,SAASH,UACdE,IAAc,EACdJ,KAAa,EACbC,MAAc;IAEd,IAAIkB,AAAmB,oBAAnBA,kBAAsCA,AAAmB,kBAAnBA,gBACxC,OAAOT,gBAAgBN,MAAMJ,OAAOC;IAGtC,IAAIkB,AAAmB,aAAnBA,gBACF,OAAOC,gBAAgBhB,MAAMJ,OAAOC;IAGtC,OAAOE,cAAcC;AACvB;AAEO,SAASgB,gBACdhB,IAAc,EACdJ,KAAa,EACbC,MAAc;IAEd,MAAMoB,OAAOZ,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGJ,QAAS;IAC5C,MAAMsB,MAAMb,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGH,SAAU;IAC5C,MAAMsB,QAAQd,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGJ,QAAS;IAC7C,MAAMwB,SAASf,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGH,SAAU;IAC/C,OAAO;QAACoB;QAAMC;QAAKC;QAAOC;KAAO;AACnC;AAEO,SAASC,gBACdrB,IAAc,EACdJ,KAAa,EACbC,MAAc,EACdyB,UAAU,CAAC,EACXC,UAAU,CAAC;IAEX/B,kBAAkB,mBAAmBQ,MAAMJ,OAAOC,QAAQyB,SAASC;IACnE,MAAM,CAACN,MAAMC,KAAKC,OAAOC,OAAO,GAAGtB,UAAUE,MAAMJ,OAAOC;IAC1D,MAAM2B,OAAO;QACX,MAAMP,OAAOK;QACb,KAAKJ,MAAMK;QACX,OAAOJ,QAAQF;QACf,QAAQG,SAASF;IACnB;IACA1B,kBAAkB,4BAA4BgC;IAC9C,OAAOA;AACT;AAEA,IAAIC,SAAS;AACN,SAASC,mBAAmBC,IAAU;QAEvCC;IADJ,IAAIH,QAAQ;IACZ,IAAI,QAAAG,CAAAA,gBAAAA,cAAa,IAAbA,KAAAA,IAAAA,cAAgB,WAAW,GAAG,QAAQ,CAAC,WAAW;QACpD,MAAMC,aAAa,CAAC,uEAAuE,EAAEF,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,wFAAwF,CAAC;QAEhN,IACEtB,KAAK,GAAG,CAACsB,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,QACpCtB,KAAK,GAAG,CAACsB,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,KACpC;YACAG,QAAQ,IAAI,CAACD;YACbJ,SAAS;QACX;IACF,OAAO,IAAIE,KAAK,KAAK,GAAG,QAAQA,KAAK,MAAM,GAAG,MAAM;QAClDG,QAAQ,IAAI,CACV,CAAC,gCAAgC,EAAEH,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,2EAA2E,CAAC;QAE3IF,SAAS;IACX;AACF;AAEO,SAASM,WAAWC,KAAa;IACtC,MAAMC,UAAU5B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI;IACnD,MAAMC,SAAS9B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG;IACjD,MAAME,WAAW/B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI,GAAGA,EAAE,KAAK;IAC9D,MAAMG,YAAYhC,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG,GAAGA,EAAE,MAAM;IAC/D,OAAO;QACL,MAAMD;QACN,KAAKE;QACL,OAAOC,WAAWH;QAClB,QAAQI,YAAYF;IACtB;AACF;AAGO,SAASG,iBAAiBd,IAAU,EAAEe,UAAgB;IAC3D,MAAMC,cAAczB,AAAmB,oBAAnBA,iBAAqC,MAAM;IAC/D,MAAM0B,iBAAiB;IAEvB,MAAMC,wBACJlB,KAAK,KAAK,GAAGgB,cACTnC,KAAK,IAAI,CAAEmC,AAAAA,CAAAA,cAAchB,KAAK,KAAI,IAAK,KACvCiB;IACN,MAAME,sBACJnB,KAAK,MAAM,GAAGgB,cACVnC,KAAK,IAAI,CAAEmC,AAAAA,CAAAA,cAAchB,KAAK,MAAK,IAAK,KACxCiB;IACNjB,KAAK,IAAI,GAAGnB,KAAK,GAAG,CAAC,GAAGmB,KAAK,IAAI,GAAGkB;IACpClB,KAAK,KAAK,GAAGnB,KAAK,GAAG,CACnBmB,KAAK,KAAK,GAAGkB,AAAwB,IAAxBA,uBACbH,WAAW,KAAK,GAAGf,KAAK,IAAI;IAE9BA,KAAK,GAAG,GAAGnB,KAAK,GAAG,CAAC,GAAGmB,KAAK,GAAG,GAAGmB;IAClCnB,KAAK,MAAM,GAAGnB,KAAK,GAAG,CACpBmB,KAAK,MAAM,GAAGmB,AAAsB,IAAtBA,qBACdJ,WAAW,MAAM,GAAGf,KAAK,GAAG;IAE9B,OAAOA;AACT;AAEO,eAAeoB,kBACpBC,gBAAwB,EACxBC,IAAkC,EAClCnB,IAAU;IAEV,MAAMoB,eAAeC,WAAWF;IAChC,MAAMG,kCAAkCF,aAAc,MAAM,CAC1D,CAACG;QACC,IAAIA,YAAY,UAAU,CAAC,QAAQ,KAAKC,SAAS,IAAI,EACnD,OAAO;QAET,OAAO;IACT;IAGF,MAAMC,eAAe,MAAMC,wBAAwB;QACjD,gBAAgBR;QAChB,sBAAsBI;QACtBtB;IACF;IACA,OAAOyB;AACT;AAEO,SAASE,uBACdC,KAAuB,EACvBC,WAA2B,EAC3BC,KAAc;IAEd,MAAMC,OAA+B,EAAE;IAEvC,KAAK,MAAMC,QAAQJ,MAAO;YAWTK;QAVf,MAAMC,OAAOF,KAAK,IAAI;QAEtB,MAAMG,SAASN,YAAY,IAAI,CAAC,CAACM,SAAWA,OAAO,IAAI,KAAKD;QAC5D,IAAI,CAACC,QAAQ;YACXhC,QAAQ,IAAI,CACV,CAAC,sBAAsB,EAAE+B,KAAK,8BAA8B,CAAC;YAE/D;QACF;QAEA,MAAMlE,SAAS,QAAAiE,CAAAA,eAAAA,KAAK,MAAM,AAAD,IAAVA,KAAAA,IAAAA,aAAa,MAAM;QAClC,MAAMG,UAAUD,OAAO,cAAc,IAAI,GAAG7E,wBAAwB4E,MAAM;QAE1E,MAAMG,WAAiC;YACrC,CAACD,QAAQ,EAAEpE,UAAU;YACrB,GAAIgE,KAAK,KAAK,IAAI,CAAC,CAAC;QACtB;QAEAD,KAAK,IAAI,CAACM;IACZ;IAEA,IAAIP,OACFC,KAAK,IAAI,CAAC;QACRD;IACF;IAGF,OAAOC;AACT"}
|
|
1
|
+
{"version":3,"file":"ai-model/common.mjs","sources":["webpack://@midscene/core/./src/ai-model/common.ts"],"sourcesContent":["import type {\n AIUsageInfo,\n BaseElement,\n ElementTreeNode,\n MidsceneYamlFlowItem,\n PlanningAction,\n PlanningActionParamInputOrKeyPress,\n PlanningActionParamSleep,\n Rect,\n ScrollParam,\n Size,\n} from '@/types';\nimport { assert } from '@midscene/shared/utils';\n\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport {\n call,\n callToGetJSONObject,\n getModelName,\n} from './service-caller/index';\n\nimport type { PlanningLocateParam } from '@/types';\nimport { NodeType } from '@midscene/shared/constants';\nimport { vlLocateMode } from '@midscene/shared/env';\nimport { treeToList } from '@midscene/shared/extractor';\nimport { compositeElementInfoImg } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nexport enum AIActionType {\n ASSERT = 0,\n INSPECT_ELEMENT = 1,\n EXTRACT_DATA = 2,\n PLAN = 3,\n DESCRIBE_ELEMENT = 4,\n}\n\nexport async function callAiFn<T>(\n msgs: AIArgs,\n AIActionTypeValue: AIActionType,\n): Promise<{ content: T; usage?: AIUsageInfo }> {\n const jsonObject = await callToGetJSONObject<T>(msgs, AIActionTypeValue);\n\n return {\n content: jsonObject.content,\n usage: jsonObject.usage,\n };\n}\n\nconst defaultBboxSize = 20; // must be even number\nconst debugInspectUtils = getDebug('ai:common');\n\n// transform the param of locate from qwen mode\nexport function fillBboxParam(\n locate: PlanningLocateParam,\n width: number,\n height: number,\n) {\n // The Qwen model might have hallucinations of naming bbox as bbox_2d.\n if ((locate as any).bbox_2d && !locate?.bbox) {\n locate.bbox = (locate as any).bbox_2d;\n // biome-ignore lint/performance/noDelete: <explanation>\n delete (locate as any).bbox_2d;\n }\n\n if (locate?.bbox) {\n locate.bbox = adaptBbox(locate.bbox, width, height);\n }\n\n return locate;\n}\n\nexport function adaptQwenBbox(\n bbox: number[],\n): [number, number, number, number] {\n if (bbox.length < 2) {\n const msg = `invalid bbox data for qwen-vl mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n }\n\n const result: [number, number, number, number] = [\n Math.round(bbox[0]),\n Math.round(bbox[1]),\n typeof bbox[2] === 'number'\n ? Math.round(bbox[2])\n : Math.round(bbox[0] + defaultBboxSize),\n typeof bbox[3] === 'number'\n ? Math.round(bbox[3])\n : Math.round(bbox[1] + defaultBboxSize),\n ];\n return result;\n}\n\nexport function adaptDoubaoBbox(\n bbox: string[] | number[] | string,\n width: number,\n height: number,\n): [number, number, number, number] {\n assert(\n width > 0 && height > 0,\n 'width and height must be greater than 0 in doubao mode',\n );\n\n if (typeof bbox === 'string') {\n assert(\n /^(\\d+)\\s(\\d+)\\s(\\d+)\\s(\\d+)$/.test(bbox.trim()),\n `invalid bbox data string for doubao-vision mode: ${bbox}`,\n );\n const splitted = bbox.split(' ');\n if (splitted.length === 4) {\n return [\n Math.round((Number(splitted[0]) * width) / 1000),\n Math.round((Number(splitted[1]) * height) / 1000),\n Math.round((Number(splitted[2]) * width) / 1000),\n Math.round((Number(splitted[3]) * height) / 1000),\n ];\n }\n throw new Error(`invalid bbox data string for doubao-vision mode: ${bbox}`);\n }\n\n if (Array.isArray(bbox) && Array.isArray(bbox[0])) {\n bbox = bbox[0];\n }\n\n let bboxList: number[] = [];\n if (Array.isArray(bbox) && typeof bbox[0] === 'string') {\n bbox.forEach((item) => {\n if (typeof item === 'string' && item.includes(',')) {\n const [x, y] = item.split(',');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else if (typeof item === 'string' && item.includes(' ')) {\n const [x, y] = item.split(' ');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else {\n bboxList.push(Number(item));\n }\n });\n } else {\n bboxList = bbox as any;\n }\n\n if (bboxList.length === 4 || bboxList.length === 5) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[2] * width) / 1000),\n Math.round((bboxList[3] * height) / 1000),\n ];\n }\n\n // treat the bbox as a center point\n if (\n bboxList.length === 6 ||\n bboxList.length === 2 ||\n bboxList.length === 3 ||\n bboxList.length === 7\n ) {\n return [\n Math.max(\n 0,\n Math.round((bboxList[0] * width) / 1000) - defaultBboxSize / 2,\n ),\n Math.max(\n 0,\n Math.round((bboxList[1] * height) / 1000) - defaultBboxSize / 2,\n ),\n Math.min(\n width,\n Math.round((bboxList[0] * width) / 1000) + defaultBboxSize / 2,\n ),\n Math.min(\n height,\n Math.round((bboxList[1] * height) / 1000) + defaultBboxSize / 2,\n ),\n ];\n }\n\n if (bbox.length === 8) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[4] * width) / 1000),\n Math.round((bboxList[5] * height) / 1000),\n ];\n }\n\n const msg = `invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n}\n\nexport function adaptBbox(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n if (vlLocateMode() === 'doubao-vision' || vlLocateMode() === 'vlm-ui-tars') {\n return adaptDoubaoBbox(bbox, width, height);\n }\n\n if (vlLocateMode() === 'gemini') {\n return adaptGeminiBbox(bbox, width, height);\n }\n\n return adaptQwenBbox(bbox);\n}\n\nexport function adaptGeminiBbox(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n const left = Math.round((bbox[1] * width) / 1000);\n const top = Math.round((bbox[0] * height) / 1000);\n const right = Math.round((bbox[3] * width) / 1000);\n const bottom = Math.round((bbox[2] * height) / 1000);\n return [left, top, right, bottom];\n}\n\nexport function adaptBboxToRect(\n bbox: number[],\n width: number,\n height: number,\n offsetX = 0,\n offsetY = 0,\n): Rect {\n debugInspectUtils('adaptBboxToRect', bbox, width, height, offsetX, offsetY);\n const [left, top, right, bottom] = adaptBbox(bbox, width, height);\n const rect = {\n left: left + offsetX,\n top: top + offsetY,\n width: right - left,\n height: bottom - top,\n };\n debugInspectUtils('adaptBboxToRect, result=', rect);\n return rect;\n}\n\nlet warned = false;\nexport function warnGPT4oSizeLimit(size: Size) {\n if (warned) return;\n if (getModelName()?.toLowerCase().includes('gpt-4o')) {\n const warningMsg = `GPT-4o has a maximum image input size of 2000x768 or 768x2000, but got ${size.width}x${size.height}. Please set your page to a smaller resolution. Otherwise, the result may be inaccurate.`;\n\n if (\n Math.max(size.width, size.height) > 2000 ||\n Math.min(size.width, size.height) > 768\n ) {\n console.warn(warningMsg);\n warned = true;\n }\n } else if (size.width > 1800 || size.height > 1800) {\n console.warn(\n `The image size seems too large (${size.width}x${size.height}). It may lead to more token usage, slower response, and inaccurate result.`,\n );\n warned = true;\n }\n}\n\nexport function mergeRects(rects: Rect[]) {\n const minLeft = Math.min(...rects.map((r) => r.left));\n const minTop = Math.min(...rects.map((r) => r.top));\n const maxRight = Math.max(...rects.map((r) => r.left + r.width));\n const maxBottom = Math.max(...rects.map((r) => r.top + r.height));\n return {\n left: minLeft,\n top: minTop,\n width: maxRight - minLeft,\n height: maxBottom - minTop,\n };\n}\n\n// expand the search area to at least 300 x 300, or add a default padding\nexport function expandSearchArea(rect: Rect, screenSize: Size) {\n const minEdgeSize = vlLocateMode() === 'doubao-vision' ? 500 : 300;\n const defaultPadding = 160;\n\n const paddingSizeHorizontal =\n rect.width < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.width) / 2)\n : defaultPadding;\n const paddingSizeVertical =\n rect.height < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.height) / 2)\n : defaultPadding;\n rect.left = Math.max(0, rect.left - paddingSizeHorizontal);\n rect.width = Math.min(\n rect.width + paddingSizeHorizontal * 2,\n screenSize.width - rect.left,\n );\n rect.top = Math.max(0, rect.top - paddingSizeVertical);\n rect.height = Math.min(\n rect.height + paddingSizeVertical * 2,\n screenSize.height - rect.top,\n );\n return rect;\n}\n\nexport async function markupImageForLLM(\n screenshotBase64: string,\n tree: ElementTreeNode<BaseElement>,\n size: Size,\n) {\n const elementsInfo = treeToList(tree);\n const elementsPositionInfoWithoutText = elementsInfo!.filter(\n (elementInfo) => {\n if (elementInfo.attributes.nodeType === NodeType.TEXT) {\n return false;\n }\n return true;\n },\n );\n\n const imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n elementsPositionInfo: elementsPositionInfoWithoutText,\n size,\n });\n return imagePayload;\n}\n\nexport function buildYamlFlowFromPlans(\n plans: PlanningAction[],\n sleep?: number,\n): MidsceneYamlFlowItem[] {\n const flow: MidsceneYamlFlowItem[] = [];\n\n for (const plan of plans) {\n const type = plan.type;\n const locate = plan.locate?.prompt!; // TODO: check if locate is null\n\n if (type === 'Tap') {\n flow.push({\n aiTap: locate!,\n });\n } else if (type === 'Hover') {\n flow.push({\n aiHover: locate!,\n });\n } else if (type === 'Input') {\n const param = plan.param as PlanningActionParamInputOrKeyPress;\n flow.push({\n aiInput: param.value,\n locate,\n });\n } else if (type === 'KeyboardPress') {\n const param = plan.param as PlanningActionParamInputOrKeyPress;\n flow.push({\n aiKeyboardPress: param.value,\n locate,\n });\n } else if (type === 'Scroll') {\n const param = plan.param as ScrollParam;\n flow.push({\n aiScroll: null,\n locate,\n direction: param.direction,\n scrollType: param.scrollType,\n distance: param.distance,\n });\n } else if (type === 'Sleep') {\n const param = plan.param as PlanningActionParamSleep;\n flow.push({\n sleep: param.timeMs,\n });\n } else if (\n type === 'AndroidBackButton' ||\n type === 'AndroidHomeButton' ||\n type === 'AndroidRecentAppsButton' ||\n type === 'AndroidLongPress' ||\n type === 'AndroidPull'\n ) {\n // not implemented in yaml yet\n } else if (\n type === 'Error' ||\n type === 'Assert' ||\n type === 'AssertWithoutThrow' ||\n type === 'Finished'\n ) {\n // do nothing\n } else {\n console.warn(\n `Cannot convert action ${type} to yaml flow. This should be a bug of Midscene.`,\n );\n }\n }\n\n if (sleep) {\n flow.push({\n sleep: sleep,\n });\n }\n\n return flow;\n}\n"],"names":["AIActionType","callAiFn","msgs","AIActionTypeValue","jsonObject","callToGetJSONObject","defaultBboxSize","debugInspectUtils","getDebug","fillBboxParam","locate","width","height","adaptBbox","adaptQwenBbox","bbox","msg","JSON","Error","result","Math","adaptDoubaoBbox","assert","splitted","Number","Array","bboxList","item","x","y","vlLocateMode","adaptGeminiBbox","left","top","right","bottom","adaptBboxToRect","offsetX","offsetY","rect","warned","warnGPT4oSizeLimit","size","_getModelName","warningMsg","console","mergeRects","rects","minLeft","r","minTop","maxRight","maxBottom","expandSearchArea","screenSize","minEdgeSize","defaultPadding","paddingSizeHorizontal","paddingSizeVertical","markupImageForLLM","screenshotBase64","tree","elementsInfo","treeToList","elementsPositionInfoWithoutText","elementInfo","NodeType","imagePayload","compositeElementInfoImg","buildYamlFlowFromPlans","plans","sleep","flow","plan","_plan_locate","type","param"],"mappings":";;;;;;;AAoCO,IAAKA,sBAAYA,WAAAA,GAAAA,SAAZA,YAAY;;;;;;WAAZA;;AAQL,eAAeC,SACpBC,IAAY,EACZC,iBAA+B;IAE/B,MAAMC,aAAa,MAAMC,oBAAuBH,MAAMC;IAEtD,OAAO;QACL,SAASC,WAAW,OAAO;QAC3B,OAAOA,WAAW,KAAK;IACzB;AACF;AAEA,MAAME,kBAAkB;AACxB,MAAMC,oBAAoBC,SAAS;AAG5B,SAASC,cACdC,MAA2B,EAC3BC,KAAa,EACbC,MAAc;IAGd,IAAKF,OAAe,OAAO,IAAI,CAACA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,AAAD,GAAG;QAC5CA,OAAO,IAAI,GAAIA,OAAe,OAAO;QAErC,OAAQA,OAAe,OAAO;IAChC;IAEA,IAAIA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,EACdA,OAAO,IAAI,GAAGG,UAAUH,OAAO,IAAI,EAAEC,OAAOC;IAG9C,OAAOF;AACT;AAEO,SAASI,cACdC,IAAc;IAEd,IAAIA,KAAK,MAAM,GAAG,GAAG;QACnB,MAAMC,MAAM,CAAC,oCAAoC,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;QAC1E,MAAM,IAAIG,MAAMF;IAClB;IAEA,MAAMG,SAA2C;QAC/CC,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QACC,YAAnB,OAAOA,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGT;QACN,YAAnB,OAAOS,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGT;KAC1B;IACD,OAAOa;AACT;AAEO,SAASE,gBACdN,IAAkC,EAClCJ,KAAa,EACbC,MAAc;IAEdU,OACEX,QAAQ,KAAKC,SAAS,GACtB;IAGF,IAAI,AAAgB,YAAhB,OAAOG,MAAmB;QAC5BO,OACE,+BAA+B,IAAI,CAACP,KAAK,IAAI,KAC7C,CAAC,iDAAiD,EAAEA,MAAM;QAE5D,MAAMQ,WAAWR,KAAK,KAAK,CAAC;QAC5B,IAAIQ,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACLH,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIZ,QAAS;YAC3CS,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIX,SAAU;YAC5CQ,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIZ,QAAS;YAC3CS,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIX,SAAU;SAC7C;QAEH,MAAM,IAAIM,MAAM,CAAC,iDAAiD,EAAEH,MAAM;IAC5E;IAEA,IAAIU,MAAM,OAAO,CAACV,SAASU,MAAM,OAAO,CAACV,IAAI,CAAC,EAAE,GAC9CA,OAAOA,IAAI,CAAC,EAAE;IAGhB,IAAIW,WAAqB,EAAE;IAC3B,IAAID,MAAM,OAAO,CAACV,SAAS,AAAmB,YAAnB,OAAOA,IAAI,CAAC,EAAE,EACvCA,KAAK,OAAO,CAAC,CAACY;QACZ,IAAI,AAAgB,YAAhB,OAAOA,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YAClD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OAAO,IAAI,AAAgB,YAAhB,OAAOF,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YACzD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OACEH,SAAS,IAAI,CAACF,OAAOG;IAEzB;SAEAD,WAAWX;IAGb,IAAIW,AAAoB,MAApBA,SAAS,MAAM,IAAUA,AAAoB,MAApBA,SAAS,MAAM,EAC1C,OAAO;QACLN,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;QACpCQ,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;KACrC;IAIH,IACEc,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,EAEf,OAAO;QACLN,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS,QAAQL,kBAAkB;QAE/Dc,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU,QAAQN,kBAAkB;QAEhEc,KAAK,GAAG,CACNT,OACAS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS,QAAQL,kBAAkB;QAE/Dc,KAAK,GAAG,CACNR,QACAQ,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU,QAAQN,kBAAkB;KAEjE;IAGH,IAAIS,AAAgB,MAAhBA,KAAK,MAAM,EACb,OAAO;QACLK,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;QACpCQ,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;KACrC;IAGH,MAAMI,MAAM,CAAC,0CAA0C,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;IAChF,MAAM,IAAIG,MAAMF;AAClB;AAEO,SAASH,UACdE,IAAc,EACdJ,KAAa,EACbC,MAAc;IAEd,IAAIkB,AAAmB,oBAAnBA,kBAAsCA,AAAmB,kBAAnBA,gBACxC,OAAOT,gBAAgBN,MAAMJ,OAAOC;IAGtC,IAAIkB,AAAmB,aAAnBA,gBACF,OAAOC,gBAAgBhB,MAAMJ,OAAOC;IAGtC,OAAOE,cAAcC;AACvB;AAEO,SAASgB,gBACdhB,IAAc,EACdJ,KAAa,EACbC,MAAc;IAEd,MAAMoB,OAAOZ,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGJ,QAAS;IAC5C,MAAMsB,MAAMb,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGH,SAAU;IAC5C,MAAMsB,QAAQd,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGJ,QAAS;IAC7C,MAAMwB,SAASf,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGH,SAAU;IAC/C,OAAO;QAACoB;QAAMC;QAAKC;QAAOC;KAAO;AACnC;AAEO,SAASC,gBACdrB,IAAc,EACdJ,KAAa,EACbC,MAAc,EACdyB,UAAU,CAAC,EACXC,UAAU,CAAC;IAEX/B,kBAAkB,mBAAmBQ,MAAMJ,OAAOC,QAAQyB,SAASC;IACnE,MAAM,CAACN,MAAMC,KAAKC,OAAOC,OAAO,GAAGtB,UAAUE,MAAMJ,OAAOC;IAC1D,MAAM2B,OAAO;QACX,MAAMP,OAAOK;QACb,KAAKJ,MAAMK;QACX,OAAOJ,QAAQF;QACf,QAAQG,SAASF;IACnB;IACA1B,kBAAkB,4BAA4BgC;IAC9C,OAAOA;AACT;AAEA,IAAIC,SAAS;AACN,SAASC,mBAAmBC,IAAU;QAEvCC;IADJ,IAAIH,QAAQ;IACZ,IAAI,QAAAG,CAAAA,gBAAAA,cAAa,IAAbA,KAAAA,IAAAA,cAAgB,WAAW,GAAG,QAAQ,CAAC,WAAW;QACpD,MAAMC,aAAa,CAAC,uEAAuE,EAAEF,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,wFAAwF,CAAC;QAEhN,IACEtB,KAAK,GAAG,CAACsB,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,QACpCtB,KAAK,GAAG,CAACsB,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,KACpC;YACAG,QAAQ,IAAI,CAACD;YACbJ,SAAS;QACX;IACF,OAAO,IAAIE,KAAK,KAAK,GAAG,QAAQA,KAAK,MAAM,GAAG,MAAM;QAClDG,QAAQ,IAAI,CACV,CAAC,gCAAgC,EAAEH,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,2EAA2E,CAAC;QAE3IF,SAAS;IACX;AACF;AAEO,SAASM,WAAWC,KAAa;IACtC,MAAMC,UAAU5B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI;IACnD,MAAMC,SAAS9B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG;IACjD,MAAME,WAAW/B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI,GAAGA,EAAE,KAAK;IAC9D,MAAMG,YAAYhC,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG,GAAGA,EAAE,MAAM;IAC/D,OAAO;QACL,MAAMD;QACN,KAAKE;QACL,OAAOC,WAAWH;QAClB,QAAQI,YAAYF;IACtB;AACF;AAGO,SAASG,iBAAiBd,IAAU,EAAEe,UAAgB;IAC3D,MAAMC,cAAczB,AAAmB,oBAAnBA,iBAAqC,MAAM;IAC/D,MAAM0B,iBAAiB;IAEvB,MAAMC,wBACJlB,KAAK,KAAK,GAAGgB,cACTnC,KAAK,IAAI,CAAEmC,AAAAA,CAAAA,cAAchB,KAAK,KAAI,IAAK,KACvCiB;IACN,MAAME,sBACJnB,KAAK,MAAM,GAAGgB,cACVnC,KAAK,IAAI,CAAEmC,AAAAA,CAAAA,cAAchB,KAAK,MAAK,IAAK,KACxCiB;IACNjB,KAAK,IAAI,GAAGnB,KAAK,GAAG,CAAC,GAAGmB,KAAK,IAAI,GAAGkB;IACpClB,KAAK,KAAK,GAAGnB,KAAK,GAAG,CACnBmB,KAAK,KAAK,GAAGkB,AAAwB,IAAxBA,uBACbH,WAAW,KAAK,GAAGf,KAAK,IAAI;IAE9BA,KAAK,GAAG,GAAGnB,KAAK,GAAG,CAAC,GAAGmB,KAAK,GAAG,GAAGmB;IAClCnB,KAAK,MAAM,GAAGnB,KAAK,GAAG,CACpBmB,KAAK,MAAM,GAAGmB,AAAsB,IAAtBA,qBACdJ,WAAW,MAAM,GAAGf,KAAK,GAAG;IAE9B,OAAOA;AACT;AAEO,eAAeoB,kBACpBC,gBAAwB,EACxBC,IAAkC,EAClCnB,IAAU;IAEV,MAAMoB,eAAeC,WAAWF;IAChC,MAAMG,kCAAkCF,aAAc,MAAM,CAC1D,CAACG;QACC,IAAIA,YAAY,UAAU,CAAC,QAAQ,KAAKC,SAAS,IAAI,EACnD,OAAO;QAET,OAAO;IACT;IAGF,MAAMC,eAAe,MAAMC,wBAAwB;QACjD,gBAAgBR;QAChB,sBAAsBI;QACtBtB;IACF;IACA,OAAOyB;AACT;AAEO,SAASE,uBACdC,KAAuB,EACvBC,KAAc;IAEd,MAAMC,OAA+B,EAAE;IAEvC,KAAK,MAAMC,QAAQH,MAAO;YAETI;QADf,MAAMC,OAAOF,KAAK,IAAI;QACtB,MAAM/D,SAAS,QAAAgE,CAAAA,eAAAA,KAAK,MAAM,AAAD,IAAVA,KAAAA,IAAAA,aAAa,MAAM;QAElC,IAAIC,AAAS,UAATA,MACFH,KAAK,IAAI,CAAC;YACR,OAAO9D;QACT;aACK,IAAIiE,AAAS,YAATA,MACTH,KAAK,IAAI,CAAC;YACR,SAAS9D;QACX;aACK,IAAIiE,AAAS,YAATA,MAAkB;YAC3B,MAAMC,QAAQH,KAAK,KAAK;YACxBD,KAAK,IAAI,CAAC;gBACR,SAASI,MAAM,KAAK;gBACpBlE;YACF;QACF,OAAO,IAAIiE,AAAS,oBAATA,MAA0B;YACnC,MAAMC,QAAQH,KAAK,KAAK;YACxBD,KAAK,IAAI,CAAC;gBACR,iBAAiBI,MAAM,KAAK;gBAC5BlE;YACF;QACF,OAAO,IAAIiE,AAAS,aAATA,MAAmB;YAC5B,MAAMC,QAAQH,KAAK,KAAK;YACxBD,KAAK,IAAI,CAAC;gBACR,UAAU;gBACV9D;gBACA,WAAWkE,MAAM,SAAS;gBAC1B,YAAYA,MAAM,UAAU;gBAC5B,UAAUA,MAAM,QAAQ;YAC1B;QACF,OAAO,IAAID,AAAS,YAATA,MAAkB;YAC3B,MAAMC,QAAQH,KAAK,KAAK;YACxBD,KAAK,IAAI,CAAC;gBACR,OAAOI,MAAM,MAAM;YACrB;QACF,OACW,wBAATD,QACAA,AAAS,wBAATA,QACAA,AAAS,8BAATA,QACAA,AAAS,uBAATA,QACAA,AAAS,kBAATA,QAIAA,AAAS,YAATA,QACAA,AAAS,aAATA,QACAA,AAAS,yBAATA,QACAA,AAAS,eAATA,QAIA9B,QAAQ,IAAI,CACV,CAAC,sBAAsB,EAAE8B,KAAK,gDAAgD,CAAC;IAGrF;IAEA,IAAIJ,OACFC,KAAK,IAAI,CAAC;QACR,OAAOD;IACT;IAGF,OAAOC;AACT"}
|
|
@@ -5,6 +5,6 @@ import { generatePlaywrightTest, generatePlaywrightTestStream } from "./prompt/p
|
|
|
5
5
|
import { generateYamlTest, generateYamlTestStream } from "./prompt/yaml-generator.mjs";
|
|
6
6
|
import { AiAssert, AiExtractElementInfo, AiLocateElement, AiLocateSection } from "./inspect.mjs";
|
|
7
7
|
import { plan } from "./llm-planning.mjs";
|
|
8
|
-
import { AIActionType,
|
|
8
|
+
import { AIActionType, adaptBboxToRect, callAiFn } from "./common.mjs";
|
|
9
9
|
import { resizeImageForUiTars, vlmPlanning } from "./ui-tars-planning.mjs";
|
|
10
|
-
export { AIActionType, AiAssert, AiExtractElementInfo, AiLocateElement, AiLocateSection,
|
|
10
|
+
export { AIActionType, AiAssert, AiExtractElementInfo, AiLocateElement, AiLocateSection, adaptBboxToRect, call as callAi, callAiFn, callAiFnWithStringResponse, callToGetJSONObject, describeUserPage, elementByPositionWithElementInfo, generatePlaywrightTest, generatePlaywrightTestStream, generateYamlTest, generateYamlTestStream, plan, resizeImageForUiTars, systemPromptToLocateElement, vlmPlanning };
|
|
@@ -56,7 +56,7 @@ async function plan(userInstruction, opts) {
|
|
|
56
56
|
actions,
|
|
57
57
|
rawResponse,
|
|
58
58
|
usage,
|
|
59
|
-
yamlFlow: buildYamlFlowFromPlans(actions,
|
|
59
|
+
yamlFlow: buildYamlFlowFromPlans(actions, planFromAI.sleep)
|
|
60
60
|
};
|
|
61
61
|
assert(planFromAI, "can't get plans from AI");
|
|
62
62
|
if (vlLocateMode()) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/llm-planning.mjs","sources":["webpack://@midscene/core/./src/ai-model/llm-planning.ts"],"sourcesContent":["import type {\n DeviceAction,\n PageType,\n PlanningAIResponse,\n UIContext,\n} from '@/types';\nimport { vlLocateMode } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { assert } from '@midscene/shared/utils';\nimport {\n AIActionType,\n type AIArgs,\n buildYamlFlowFromPlans,\n callAiFn,\n fillBboxParam,\n markupImageForLLM,\n warnGPT4oSizeLimit,\n} from './common';\nimport {\n automationUserPrompt,\n generateTaskBackgroundContext,\n systemPromptToTaskPlanning,\n} from './prompt/llm-planning';\nimport { describeUserPage } from './prompt/util';\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n pageType: PageType;\n actionSpace: DeviceAction[];\n callAI?: typeof callAiFn<PlanningAIResponse>;\n log?: string;\n actionContext?: string;\n },\n): Promise<PlanningAIResponse> {\n const { callAI, context } = opts || {};\n const { screenshotBase64, size } = context;\n const { description: pageDescription, elementById } =\n await describeUserPage(context);\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n vlMode: vlLocateMode(),\n });\n const taskBackgroundContextText = generateTaskBackgroundContext(\n userInstruction,\n opts.log,\n opts.actionContext,\n );\n const userInstructionPrompt = await automationUserPrompt(\n vlLocateMode(),\n ).format({\n pageDescription,\n taskBackgroundContext: taskBackgroundContextText,\n });\n\n let imagePayload = screenshotBase64;\n if (vlLocateMode() === 'qwen-vl') {\n imagePayload = await paddingToMatchBlockByBase64(imagePayload);\n } else if (!vlLocateMode()) {\n imagePayload = await markupImageForLLM(\n screenshotBase64,\n context.tree,\n context.size,\n );\n }\n\n warnGPT4oSizeLimit(size);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n const call = callAI || callAiFn;\n const { content, usage } = await call(msgs, AIActionType.PLAN);\n const rawResponse = JSON.stringify(content, undefined, 2);\n const planFromAI = content;\n\n const actions =\n (planFromAI.action?.type ? [planFromAI.action] : planFromAI.actions) || [];\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n yamlFlow: buildYamlFlowFromPlans(
|
|
1
|
+
{"version":3,"file":"ai-model/llm-planning.mjs","sources":["webpack://@midscene/core/./src/ai-model/llm-planning.ts"],"sourcesContent":["import type {\n DeviceAction,\n PageType,\n PlanningAIResponse,\n UIContext,\n} from '@/types';\nimport { vlLocateMode } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { assert } from '@midscene/shared/utils';\nimport {\n AIActionType,\n type AIArgs,\n buildYamlFlowFromPlans,\n callAiFn,\n fillBboxParam,\n markupImageForLLM,\n warnGPT4oSizeLimit,\n} from './common';\nimport {\n automationUserPrompt,\n generateTaskBackgroundContext,\n systemPromptToTaskPlanning,\n} from './prompt/llm-planning';\nimport { describeUserPage } from './prompt/util';\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n pageType: PageType;\n actionSpace: DeviceAction[];\n callAI?: typeof callAiFn<PlanningAIResponse>;\n log?: string;\n actionContext?: string;\n },\n): Promise<PlanningAIResponse> {\n const { callAI, context } = opts || {};\n const { screenshotBase64, size } = context;\n const { description: pageDescription, elementById } =\n await describeUserPage(context);\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n vlMode: vlLocateMode(),\n });\n const taskBackgroundContextText = generateTaskBackgroundContext(\n userInstruction,\n opts.log,\n opts.actionContext,\n );\n const userInstructionPrompt = await automationUserPrompt(\n vlLocateMode(),\n ).format({\n pageDescription,\n taskBackgroundContext: taskBackgroundContextText,\n });\n\n let imagePayload = screenshotBase64;\n if (vlLocateMode() === 'qwen-vl') {\n imagePayload = await paddingToMatchBlockByBase64(imagePayload);\n } else if (!vlLocateMode()) {\n imagePayload = await markupImageForLLM(\n screenshotBase64,\n context.tree,\n context.size,\n );\n }\n\n warnGPT4oSizeLimit(size);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n const call = callAI || callAiFn;\n const { content, usage } = await call(msgs, AIActionType.PLAN);\n const rawResponse = JSON.stringify(content, undefined, 2);\n const planFromAI = content;\n\n const actions =\n (planFromAI.action?.type ? [planFromAI.action] : planFromAI.actions) || [];\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n yamlFlow: buildYamlFlowFromPlans(actions, planFromAI.sleep),\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n if (vlLocateMode()) {\n actions.forEach((action) => {\n if (action.locate) {\n try {\n action.locate = fillBboxParam(action.locate, size.width, size.height);\n } catch (e) {\n throw new Error(\n `Failed to fill locate param: ${planFromAI.error} (${\n e instanceof Error ? e.message : 'unknown error'\n })`,\n {\n cause: e,\n },\n );\n }\n }\n });\n // in Qwen-VL, error means error. In GPT-4o, error may mean more actions are needed.\n assert(!planFromAI.error, `Failed to plan actions: ${planFromAI.error}`);\n } else {\n actions.forEach((action) => {\n if (action.locate?.id) {\n // The model may return indexId, need to perform a query correction to avoid exceptions\n const element = elementById(action.locate.id);\n if (element) {\n action.locate.id = element.id;\n }\n }\n });\n }\n\n if (\n actions.length === 0 &&\n returnValue.more_actions_needed_by_instruction &&\n !returnValue.sleep\n ) {\n console.warn(\n 'No actions planned for the prompt, but model said more actions are needed:',\n userInstruction,\n );\n }\n\n return returnValue;\n}\n"],"names":["plan","userInstruction","opts","_planFromAI_action","callAI","context","screenshotBase64","size","pageDescription","elementById","describeUserPage","systemPrompt","systemPromptToTaskPlanning","vlLocateMode","taskBackgroundContextText","generateTaskBackgroundContext","userInstructionPrompt","automationUserPrompt","imagePayload","paddingToMatchBlockByBase64","markupImageForLLM","warnGPT4oSizeLimit","msgs","call","callAiFn","content","usage","AIActionType","rawResponse","JSON","undefined","planFromAI","actions","returnValue","buildYamlFlowFromPlans","assert","action","fillBboxParam","e","Error","_action_locate","element","console"],"mappings":";;;;;;AAyBO,eAAeA,KACpBC,eAAuB,EACvBC,IAOC;QA8DEC;IA5DH,MAAM,EAAEC,MAAM,EAAEC,OAAO,EAAE,GAAGH,QAAQ,CAAC;IACrC,MAAM,EAAEI,gBAAgB,EAAEC,IAAI,EAAE,GAAGF;IACnC,MAAM,EAAE,aAAaG,eAAe,EAAEC,WAAW,EAAE,GACjD,MAAMC,iBAAiBL;IAEzB,MAAMM,eAAe,MAAMC,2BAA2B;QACpD,aAAaV,KAAK,WAAW;QAC7B,QAAQW;IACV;IACA,MAAMC,4BAA4BC,8BAChCd,iBACAC,KAAK,GAAG,EACRA,KAAK,aAAa;IAEpB,MAAMc,wBAAwB,MAAMC,qBAClCJ,gBACA,MAAM,CAAC;QACPL;QACA,uBAAuBM;IACzB;IAEA,IAAII,eAAeZ;IACnB,IAAIO,AAAmB,cAAnBA,gBACFK,eAAe,MAAMC,4BAA4BD;SAC5C,IAAI,CAACL,gBACVK,eAAe,MAAME,kBACnBd,kBACAD,QAAQ,IAAI,EACZA,QAAQ,IAAI;IAIhBgB,mBAAmBd;IAEnB,MAAMe,OAAe;QACnB;YAAE,MAAM;YAAU,SAASX;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKO;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMF;gBACR;aACD;QACH;KACD;IAED,MAAMO,OAAOnB,UAAUoB;IACvB,MAAM,EAAEC,OAAO,EAAEC,KAAK,EAAE,GAAG,MAAMH,KAAKD,MAAMK,aAAa,IAAI;IAC7D,MAAMC,cAAcC,KAAK,SAAS,CAACJ,SAASK,QAAW;IACvD,MAAMC,aAAaN;IAEnB,MAAMO,UACH7B,AAAAA,CAAAA,SAAAA,CAAAA,qBAAAA,WAAW,MAAM,AAAD,IAAhBA,KAAAA,IAAAA,mBAAmB,IAAI,AAAD,IAAI;QAAC4B,WAAW,MAAM;KAAC,GAAGA,WAAW,OAAM,KAAM,EAAE;IAC5E,MAAME,cAAkC;QACtC,GAAGF,UAAU;QACbC;QACAJ;QACAF;QACA,UAAUQ,uBAAuBF,SAASD,WAAW,KAAK;IAC5D;IAEAI,OAAOJ,YAAY;IAEnB,IAAIlB,gBAAgB;QAClBmB,QAAQ,OAAO,CAAC,CAACI;YACf,IAAIA,OAAO,MAAM,EACf,IAAI;gBACFA,OAAO,MAAM,GAAGC,cAAcD,OAAO,MAAM,EAAE7B,KAAK,KAAK,EAAEA,KAAK,MAAM;YACtE,EAAE,OAAO+B,GAAG;gBACV,MAAM,IAAIC,MACR,CAAC,6BAA6B,EAAER,WAAW,KAAK,CAAC,EAAE,EACjDO,aAAaC,QAAQD,EAAE,OAAO,GAAG,gBAClC,CAAC,CAAC,EACH;oBACE,OAAOA;gBACT;YAEJ;QAEJ;QAEAH,OAAO,CAACJ,WAAW,KAAK,EAAE,CAAC,wBAAwB,EAAEA,WAAW,KAAK,EAAE;IACzE,OACEC,QAAQ,OAAO,CAAC,CAACI;YACXI;QAAJ,IAAI,QAAAA,CAAAA,iBAAAA,OAAO,MAAM,AAAD,IAAZA,KAAAA,IAAAA,eAAe,EAAE,EAAE;YAErB,MAAMC,UAAUhC,YAAY2B,OAAO,MAAM,CAAC,EAAE;YAC5C,IAAIK,SACFL,OAAO,MAAM,CAAC,EAAE,GAAGK,QAAQ,EAAE;QAEjC;IACF;IAGF,IACET,AAAmB,MAAnBA,QAAQ,MAAM,IACdC,YAAY,kCAAkC,IAC9C,CAACA,YAAY,KAAK,EAElBS,QAAQ,IAAI,CACV,8EACAzC;IAIJ,OAAOgC;AACT"}
|
package/dist/es/types.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.mjs","sources":["webpack://@midscene/core/./src/types.ts"],"sourcesContent":["/* eslint-disable @typescript-eslint/no-explicit-any */\n\nimport type { NodeType } from '@midscene/shared/constants';\nimport type {\n BaseElement,\n ElementTreeNode,\n Rect,\n Size,\n} from '@midscene/shared/types';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { DetailedLocateParam, MidsceneYamlFlowItem } from './yaml';\n\nexport type {\n ElementTreeNode,\n BaseElement,\n Rect,\n Size,\n Point,\n} from '@midscene/shared/types';\nexport * from './yaml';\n\nexport type AIUsageInfo = Record<string, any> & {\n prompt_tokens: number | undefined;\n completion_tokens: number | undefined;\n total_tokens: number | undefined;\n time_cost: number | undefined;\n};\n\n/**\n * openai\n *\n */\nexport enum AIResponseFormat {\n JSON = 'json_object',\n TEXT = 'text',\n}\n\nexport type AISingleElementResponseById = {\n id: string;\n reason?: string;\n text?: string;\n xpaths?: string[];\n};\n\nexport type AISingleElementResponseByPosition = {\n position?: {\n x: number;\n y: number;\n };\n bbox?: [number, number, number, number];\n reason: string;\n text: string;\n};\n\nexport type AISingleElementResponse = AISingleElementResponseById;\nexport interface AIElementLocatorResponse {\n elements: {\n id: string;\n reason?: string;\n text?: string;\n xpaths?: string[];\n }[];\n bbox?: [number, number, number, number];\n isOrderSensitive?: boolean;\n errors?: string[];\n}\n\nexport interface AIElementCoordinatesResponse {\n bbox: [number, number, number, number];\n isOrderSensitive?: boolean;\n errors?: string[];\n}\n\nexport type AIElementResponse =\n | AIElementLocatorResponse\n | AIElementCoordinatesResponse;\n\nexport interface AIDataExtractionResponse<DataDemand> {\n data: DataDemand;\n errors?: string[];\n thought?: string;\n}\n\nexport interface AISectionLocatorResponse {\n bbox: [number, number, number, number];\n references_bbox?: [number, number, number, number][];\n error?: string;\n}\n\nexport interface AIAssertionResponse {\n pass: boolean;\n thought: string;\n}\n\nexport interface AIDescribeElementResponse {\n description: string;\n error?: string;\n}\n\nexport interface LocatorValidatorOption {\n centerDistanceThreshold?: number;\n}\n\nexport interface LocateValidatorResult {\n pass: boolean;\n rect: Rect;\n center: [number, number];\n centerDistance?: number;\n}\n\nexport interface AgentDescribeElementAtPointResult {\n prompt: string;\n deepThink: boolean;\n verifyResult?: LocateValidatorResult;\n}\n\n/**\n * context\n */\n\nexport abstract class UIContext<ElementType extends BaseElement = BaseElement> {\n abstract screenshotBase64: string;\n\n abstract tree: ElementTreeNode<ElementType>;\n\n abstract size: Size;\n}\n\n/**\n * insight\n */\n\nexport type CallAIFn = <T>(\n messages: ChatCompletionMessageParam[],\n) => Promise<T>;\n\nexport interface InsightOptions {\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n aiVendorFn?: CallAIFn;\n}\n\nexport type EnsureObject<T> = { [K in keyof T]: any };\n\nexport type InsightAction = 'locate' | 'extract' | 'assert' | 'describe';\n\nexport type InsightExtractParam = string | Record<string, string>;\n\nexport type LocateResultElement = {\n id: string;\n indexId?: number;\n center: [number, number];\n rect: Rect;\n xpaths: string[];\n attributes: {\n nodeType: NodeType;\n [key: string]: string;\n };\n isOrderSensitive?: boolean;\n};\n\nexport interface LocateResult {\n element: LocateResultElement | null;\n rect?: Rect;\n}\n\nexport interface InsightTaskInfo {\n durationMs: number;\n formatResponse?: string;\n rawResponse?: string;\n usage?: AIUsageInfo;\n searchArea?: Rect;\n searchAreaRawResponse?: string;\n searchAreaUsage?: AIUsageInfo;\n}\n\nexport interface DumpMeta {\n sdkVersion: string;\n logTime: number;\n model_name: string;\n model_description?: string;\n}\n\nexport interface ReportDumpWithAttributes {\n dumpString: string;\n attributes?: Record<string, any>;\n}\n\nexport interface InsightDump extends DumpMeta {\n type: 'locate' | 'extract' | 'assert';\n logId: string;\n userQuery: {\n element?: TUserPrompt;\n dataDemand?: InsightExtractParam;\n assertion?: TUserPrompt;\n };\n matchedElement: BaseElement[];\n matchedRect?: Rect;\n deepThink?: boolean;\n data: any;\n assertionPass?: boolean;\n assertionThought?: string;\n taskInfo: InsightTaskInfo;\n error?: string;\n output?: any;\n}\n\nexport type PartialInsightDumpFromSDK = Omit<\n InsightDump,\n 'sdkVersion' | 'logTime' | 'logId' | 'model_name'\n>;\n\nexport type DumpSubscriber = (dump: InsightDump) => Promise<void> | void;\n\n// intermediate variables to optimize the return value by AI\nexport interface LiteUISection {\n name: string;\n description: string;\n sectionCharacteristics: string;\n textIds: string[];\n}\n\nexport type ElementById = (id: string) => BaseElement | null;\n\nexport type InsightAssertionResponse = AIAssertionResponse & {\n usage?: AIUsageInfo;\n};\n\n/**\n * agent\n */\n\nexport type OnTaskStartTip = (tip: string) => Promise<void> | void;\n\nexport interface AgentWaitForOpt {\n checkIntervalMs?: number;\n timeoutMs?: number;\n}\n\nexport interface AgentAssertOpt {\n keepRawResponse?: boolean;\n}\n\n/**\n * planning\n *\n */\n\nexport interface PlanningLocateParam extends DetailedLocateParam {\n id?: string;\n bbox?: [number, number, number, number];\n}\n\nexport interface PlanningAction<ParamType = any> {\n thought?: string;\n type:\n | 'Locate'\n | 'Tap'\n | 'RightClick'\n | 'Hover'\n | 'Drag'\n | 'Input'\n | 'KeyboardPress'\n | 'Scroll'\n | 'Error'\n | 'Assert'\n | 'AssertWithoutThrow'\n | 'Sleep'\n | 'Finished'\n | 'AndroidBackButton'\n | 'AndroidHomeButton'\n | 'AndroidRecentAppsButton'\n | 'AndroidLongPress'\n | 'AndroidPull';\n param: ParamType;\n locate?: PlanningLocateParam | null;\n}\n\nexport interface PlanningAIResponse {\n action?: PlanningAction; // this is the qwen mode\n actions?: PlanningAction[];\n more_actions_needed_by_instruction: boolean;\n log: string;\n sleep?: number;\n error?: string;\n usage?: AIUsageInfo;\n rawResponse?: string;\n yamlFlow?: MidsceneYamlFlowItem[];\n yamlString?: string;\n}\n\nexport type PlanningActionParamTap = null;\nexport type PlanningActionParamHover = null;\nexport type PlanningActionParamRightClick = null;\n\nexport interface PlanningActionParamInputOrKeyPress {\n value: string;\n autoDismissKeyboard?: boolean;\n}\n\nexport interface PlanningActionParamAssert {\n assertion: TUserPrompt;\n}\n\nexport interface PlanningActionParamSleep {\n timeMs: number;\n}\n\nexport interface PlanningActionParamError {\n thought: string;\n}\n\nexport type PlanningActionParamWaitFor = AgentWaitForOpt & {\n assertion: string;\n};\n\nexport interface AndroidLongPressParam {\n duration?: number;\n}\n\nexport interface AndroidPullParam {\n direction: 'up' | 'down';\n distance?: number;\n duration?: number;\n}\n/**\n * misc\n */\n\nexport interface Color {\n name: string;\n hex: string;\n}\n\nexport interface BaseAgentParserOpt {\n selector?: string;\n ignoreMarker?: boolean;\n}\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PuppeteerParserOpt extends BaseAgentParserOpt {}\n\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PlaywrightParserOpt extends BaseAgentParserOpt {}\n\n/*\naction\n*/\nexport interface ExecutionTaskProgressOptions {\n onTaskStart?: (task: ExecutionTask) => Promise<void> | void;\n}\n\nexport interface ExecutionRecorderItem {\n type: 'screenshot';\n ts: number;\n screenshot?: string;\n timing?: string;\n}\n\nexport type ExecutionTaskType =\n | 'Planning'\n | 'Insight'\n | 'Action'\n | 'Assertion'\n | 'Log';\n\nexport interface ExecutorContext {\n task: ExecutionTask;\n element?: LocateResultElement | null;\n}\n\nexport interface ExecutionTaskApply<\n Type extends ExecutionTaskType = any,\n TaskParam = any,\n TaskOutput = any,\n TaskLog = any,\n> {\n type: Type;\n subType?: string;\n param?: TaskParam;\n thought?: string;\n locate?: PlanningLocateParam | null;\n pageContext?: UIContext;\n executor: (\n param: TaskParam,\n context: ExecutorContext,\n ) => // biome-ignore lint/suspicious/noConfusingVoidType: <explanation>\n | Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void>\n | undefined\n | void;\n}\n\nexport interface ExecutionTaskHitBy {\n from: string;\n context: Record<string, any>;\n}\n\nexport interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {\n output?: TaskOutput;\n log?: TaskLog;\n recorder?: ExecutionRecorderItem[];\n hitBy?: ExecutionTaskHitBy;\n}\n\nexport type ExecutionTask<\n E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<\n any,\n any,\n any\n >,\n> = E &\n ExecutionTaskReturn<\n E extends ExecutionTaskApply<any, any, infer TaskOutput, any>\n ? TaskOutput\n : unknown,\n E extends ExecutionTaskApply<any, any, any, infer TaskLog>\n ? TaskLog\n : unknown\n > & {\n status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';\n error?: Error;\n errorMessage?: string;\n errorStack?: string;\n timing?: {\n start: number;\n end?: number;\n cost?: number;\n };\n usage?: AIUsageInfo;\n };\n\nexport interface ExecutionDump extends DumpMeta {\n name: string;\n description?: string;\n tasks: ExecutionTask[];\n}\n\n/*\ntask - insight-locate\n*/\nexport type ExecutionTaskInsightLocateParam = PlanningLocateParam;\n\nexport interface ExecutionTaskInsightLocateOutput {\n element: LocateResultElement | null;\n}\n\nexport interface ExecutionTaskInsightDumpLog {\n dump?: InsightDump;\n}\n\nexport type ExecutionTaskInsightLocateApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightLocateParam,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightLocate =\n ExecutionTask<ExecutionTaskInsightLocateApply>;\n\n/*\ntask - insight-query\n*/\nexport interface ExecutionTaskInsightQueryParam {\n dataDemand: InsightExtractParam;\n}\n\nexport interface ExecutionTaskInsightQueryOutput {\n data: any;\n}\n\nexport type ExecutionTaskInsightQueryApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightQueryParam,\n any,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightQuery =\n ExecutionTask<ExecutionTaskInsightQueryApply>;\n\n/*\ntask - assertion\n*/\nexport interface ExecutionTaskInsightAssertionParam {\n assertion: string;\n}\n\nexport type ExecutionTaskInsightAssertionApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightAssertionParam,\n InsightAssertionResponse,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightAssertion =\n ExecutionTask<ExecutionTaskInsightAssertionApply>;\n\n/*\ntask - action (i.e. interact) \n*/\nexport type ExecutionTaskActionApply<ActionParam = any> = ExecutionTaskApply<\n 'Action',\n ActionParam,\n void,\n void\n>;\n\nexport type ExecutionTaskAction = ExecutionTask<ExecutionTaskActionApply>;\n\n/*\ntask - Log\n*/\n\nexport type ExecutionTaskLogApply<\n LogParam = {\n content: string;\n },\n> = ExecutionTaskApply<'Log', LogParam, void, void>;\n\nexport type ExecutionTaskLog = ExecutionTask<ExecutionTaskLogApply>;\n\n/*\ntask - planning\n*/\n\nexport type ExecutionTaskPlanningApply = ExecutionTaskApply<\n 'Planning',\n {\n userInstruction: string;\n log?: string;\n },\n PlanningAIResponse\n>;\n\nexport type ExecutionTaskPlanning = ExecutionTask<ExecutionTaskPlanningApply>;\n\n/*\nGrouped dump\n*/\nexport interface GroupedActionDump {\n groupName: string;\n groupDescription?: string;\n executions: ExecutionDump[];\n}\n\nexport type PageType =\n | 'puppeteer'\n | 'playwright'\n | 'static'\n | 'chrome-extension-proxy'\n | 'android';\n\nexport interface StreamingCodeGenerationOptions {\n /** Whether to enable streaming output */\n stream?: boolean;\n /** Callback function to handle streaming chunks */\n onChunk?: StreamingCallback;\n /** Callback function to handle streaming completion */\n onComplete?: (finalCode: string) => void;\n /** Callback function to handle streaming errors */\n onError?: (error: Error) => void;\n}\n\nexport type StreamingCallback = (chunk: CodeGenerationChunk) => void;\n\nexport interface CodeGenerationChunk {\n /** The incremental content chunk */\n content: string;\n /** The reasoning content */\n reasoning_content: string;\n /** The accumulated content so far */\n accumulated: string;\n /** Whether this is the final chunk */\n isComplete: boolean;\n /** Token usage information if available */\n usage?: AIUsageInfo;\n}\n\nexport interface StreamingAIResponse {\n /** The final accumulated content */\n content: string;\n /** Token usage information */\n usage?: AIUsageInfo;\n /** Whether the response was streamed */\n isStreamed: boolean;\n}\n\nexport type TMultimodalPrompt = {\n /**\n * Support use image to inspect elements.\n * The \"images\" field is an object that uses image name as key and image url as value.\n * The image url can be a local path, a http link , or a base64 string.\n */\n images?: {\n name: string;\n url: string;\n }[];\n /**\n * By default, the image url in the \"images\" filed starts with `https://` or `http://` will be directly sent to the LLM.\n * In case the images are not accessible to the LLM (One common case is that image url is internal network only.), you can enable this option.\n * Then image will be download and convert to base64 format.\n */\n convertHttpImage2Base64?: boolean;\n};\n\nexport type TUserPrompt =\n | string\n | ({\n prompt: string;\n } & Partial<TMultimodalPrompt>);\n\nexport interface DeviceAction<ParamType = any> {\n name: string;\n interfaceAlias?: string;\n description?: string;\n paramSchema?: string;\n paramDescription?: string;\n location?: 'required' | 'optional' | false;\n whatToLocate?: string; // what to locate if location is required or optional\n call: (context: ExecutorContext, param: ParamType) => Promise<void> | void;\n}\n"],"names":["AIResponseFormat","UIContext"],"mappings":";AAgCO,IAAKA,yBAAgBA,WAAAA,GAAAA,SAAhBA,gBAAgB;;;WAAhBA;;AAwFL,MAAeC;AAMtB"}
|
|
1
|
+
{"version":3,"file":"types.mjs","sources":["webpack://@midscene/core/./src/types.ts"],"sourcesContent":["/* eslint-disable @typescript-eslint/no-explicit-any */\n\nimport type { NodeType } from '@midscene/shared/constants';\nimport type {\n BaseElement,\n ElementTreeNode,\n Rect,\n Size,\n} from '@midscene/shared/types';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { DetailedLocateParam, MidsceneYamlFlowItem } from './yaml';\n\nexport type {\n ElementTreeNode,\n BaseElement,\n Rect,\n Size,\n Point,\n} from '@midscene/shared/types';\nexport * from './yaml';\n\nexport type AIUsageInfo = Record<string, any> & {\n prompt_tokens: number | undefined;\n completion_tokens: number | undefined;\n total_tokens: number | undefined;\n time_cost: number | undefined;\n};\n\n/**\n * openai\n *\n */\nexport enum AIResponseFormat {\n JSON = 'json_object',\n TEXT = 'text',\n}\n\nexport type AISingleElementResponseById = {\n id: string;\n reason?: string;\n text?: string;\n xpaths?: string[];\n};\n\nexport type AISingleElementResponseByPosition = {\n position?: {\n x: number;\n y: number;\n };\n bbox?: [number, number, number, number];\n reason: string;\n text: string;\n};\n\nexport type AISingleElementResponse = AISingleElementResponseById;\nexport interface AIElementLocatorResponse {\n elements: {\n id: string;\n reason?: string;\n text?: string;\n xpaths?: string[];\n }[];\n bbox?: [number, number, number, number];\n isOrderSensitive?: boolean;\n errors?: string[];\n}\n\nexport interface AIElementCoordinatesResponse {\n bbox: [number, number, number, number];\n isOrderSensitive?: boolean;\n errors?: string[];\n}\n\nexport type AIElementResponse =\n | AIElementLocatorResponse\n | AIElementCoordinatesResponse;\n\nexport interface AIDataExtractionResponse<DataDemand> {\n data: DataDemand;\n errors?: string[];\n thought?: string;\n}\n\nexport interface AISectionLocatorResponse {\n bbox: [number, number, number, number];\n references_bbox?: [number, number, number, number][];\n error?: string;\n}\n\nexport interface AIAssertionResponse {\n pass: boolean;\n thought: string;\n}\n\nexport interface AIDescribeElementResponse {\n description: string;\n error?: string;\n}\n\nexport interface LocatorValidatorOption {\n centerDistanceThreshold?: number;\n}\n\nexport interface LocateValidatorResult {\n pass: boolean;\n rect: Rect;\n center: [number, number];\n centerDistance?: number;\n}\n\nexport interface AgentDescribeElementAtPointResult {\n prompt: string;\n deepThink: boolean;\n verifyResult?: LocateValidatorResult;\n}\n\n/**\n * context\n */\n\nexport abstract class UIContext<ElementType extends BaseElement = BaseElement> {\n abstract screenshotBase64: string;\n\n abstract tree: ElementTreeNode<ElementType>;\n\n abstract size: Size;\n}\n\n/**\n * insight\n */\n\nexport type CallAIFn = <T>(\n messages: ChatCompletionMessageParam[],\n) => Promise<T>;\n\nexport interface InsightOptions {\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n aiVendorFn?: CallAIFn;\n}\n\nexport type EnsureObject<T> = { [K in keyof T]: any };\n\nexport type InsightAction = 'locate' | 'extract' | 'assert' | 'describe';\n\nexport type InsightExtractParam = string | Record<string, string>;\n\nexport type LocateResultElement = {\n id: string;\n indexId?: number;\n center: [number, number];\n rect: Rect;\n xpaths: string[];\n attributes: {\n nodeType: NodeType;\n [key: string]: string;\n };\n isOrderSensitive?: boolean;\n};\n\nexport interface LocateResult {\n element: LocateResultElement | null;\n rect?: Rect;\n}\n\nexport interface InsightTaskInfo {\n durationMs: number;\n formatResponse?: string;\n rawResponse?: string;\n usage?: AIUsageInfo;\n searchArea?: Rect;\n searchAreaRawResponse?: string;\n searchAreaUsage?: AIUsageInfo;\n}\n\nexport interface DumpMeta {\n sdkVersion: string;\n logTime: number;\n model_name: string;\n model_description?: string;\n}\n\nexport interface ReportDumpWithAttributes {\n dumpString: string;\n attributes?: Record<string, any>;\n}\n\nexport interface InsightDump extends DumpMeta {\n type: 'locate' | 'extract' | 'assert';\n logId: string;\n userQuery: {\n element?: TUserPrompt;\n dataDemand?: InsightExtractParam;\n assertion?: TUserPrompt;\n };\n matchedElement: BaseElement[];\n matchedRect?: Rect;\n deepThink?: boolean;\n data: any;\n assertionPass?: boolean;\n assertionThought?: string;\n taskInfo: InsightTaskInfo;\n error?: string;\n output?: any;\n}\n\nexport type PartialInsightDumpFromSDK = Omit<\n InsightDump,\n 'sdkVersion' | 'logTime' | 'logId' | 'model_name'\n>;\n\nexport type DumpSubscriber = (dump: InsightDump) => Promise<void> | void;\n\n// intermediate variables to optimize the return value by AI\nexport interface LiteUISection {\n name: string;\n description: string;\n sectionCharacteristics: string;\n textIds: string[];\n}\n\nexport type ElementById = (id: string) => BaseElement | null;\n\nexport type InsightAssertionResponse = AIAssertionResponse & {\n usage?: AIUsageInfo;\n};\n\n/**\n * agent\n */\n\nexport type OnTaskStartTip = (tip: string) => Promise<void> | void;\n\nexport interface AgentWaitForOpt {\n checkIntervalMs?: number;\n timeoutMs?: number;\n}\n\nexport interface AgentAssertOpt {\n keepRawResponse?: boolean;\n}\n\n/**\n * planning\n *\n */\n\nexport interface PlanningLocateParam extends DetailedLocateParam {\n id?: string;\n bbox?: [number, number, number, number];\n}\n\nexport interface PlanningAction<ParamType = any> {\n thought?: string;\n type:\n | 'Locate'\n | 'Tap'\n | 'RightClick'\n | 'Hover'\n | 'Drag'\n | 'Input'\n | 'KeyboardPress'\n | 'Scroll'\n | 'Error'\n | 'Assert'\n | 'AssertWithoutThrow'\n | 'Sleep'\n | 'Finished'\n | 'AndroidBackButton'\n | 'AndroidHomeButton'\n | 'AndroidRecentAppsButton'\n | 'AndroidLongPress'\n | 'AndroidPull';\n param: ParamType;\n locate?: PlanningLocateParam | null;\n}\n\nexport interface PlanningAIResponse {\n action?: PlanningAction; // this is the qwen mode\n actions?: PlanningAction[];\n more_actions_needed_by_instruction: boolean;\n log: string;\n sleep?: number;\n error?: string;\n usage?: AIUsageInfo;\n rawResponse?: string;\n yamlFlow?: MidsceneYamlFlowItem[];\n yamlString?: string;\n}\n\nexport type PlanningActionParamTap = null;\nexport type PlanningActionParamHover = null;\nexport type PlanningActionParamRightClick = null;\n\nexport interface PlanningActionParamInputOrKeyPress {\n value: string;\n autoDismissKeyboard?: boolean;\n}\n\nexport interface PlanningActionParamAssert {\n assertion: TUserPrompt;\n}\n\nexport interface PlanningActionParamSleep {\n timeMs: number;\n}\n\nexport interface PlanningActionParamError {\n thought: string;\n}\n\nexport type PlanningActionParamWaitFor = AgentWaitForOpt & {\n assertion: string;\n};\n\nexport interface AndroidLongPressParam {\n duration?: number;\n}\n\nexport interface AndroidPullParam {\n direction: 'up' | 'down';\n distance?: number;\n duration?: number;\n}\n/**\n * misc\n */\n\nexport interface Color {\n name: string;\n hex: string;\n}\n\nexport interface BaseAgentParserOpt {\n selector?: string;\n ignoreMarker?: boolean;\n}\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PuppeteerParserOpt extends BaseAgentParserOpt {}\n\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PlaywrightParserOpt extends BaseAgentParserOpt {}\n\n/*\naction\n*/\nexport interface ExecutionTaskProgressOptions {\n onTaskStart?: (task: ExecutionTask) => Promise<void> | void;\n}\n\nexport interface ExecutionRecorderItem {\n type: 'screenshot';\n ts: number;\n screenshot?: string;\n timing?: string;\n}\n\nexport type ExecutionTaskType =\n | 'Planning'\n | 'Insight'\n | 'Action'\n | 'Assertion'\n | 'Log';\n\nexport interface ExecutorContext {\n task: ExecutionTask;\n element?: LocateResultElement | null;\n}\n\nexport interface ExecutionTaskApply<\n Type extends ExecutionTaskType = any,\n TaskParam = any,\n TaskOutput = any,\n TaskLog = any,\n> {\n type: Type;\n subType?: string;\n param?: TaskParam;\n thought?: string;\n locate?: PlanningLocateParam | null;\n pageContext?: UIContext;\n executor: (\n param: TaskParam,\n context: ExecutorContext,\n ) => // biome-ignore lint/suspicious/noConfusingVoidType: <explanation>\n | Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void>\n | undefined\n | void;\n}\n\nexport interface ExecutionTaskHitBy {\n from: string;\n context: Record<string, any>;\n}\n\nexport interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {\n output?: TaskOutput;\n log?: TaskLog;\n recorder?: ExecutionRecorderItem[];\n hitBy?: ExecutionTaskHitBy;\n}\n\nexport type ExecutionTask<\n E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<\n any,\n any,\n any\n >,\n> = E &\n ExecutionTaskReturn<\n E extends ExecutionTaskApply<any, any, infer TaskOutput, any>\n ? TaskOutput\n : unknown,\n E extends ExecutionTaskApply<any, any, any, infer TaskLog>\n ? TaskLog\n : unknown\n > & {\n status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';\n error?: Error;\n errorMessage?: string;\n errorStack?: string;\n timing?: {\n start: number;\n end?: number;\n cost?: number;\n };\n usage?: AIUsageInfo;\n };\n\nexport interface ExecutionDump extends DumpMeta {\n name: string;\n description?: string;\n tasks: ExecutionTask[];\n}\n\n/*\ntask - insight-locate\n*/\nexport type ExecutionTaskInsightLocateParam = PlanningLocateParam;\n\nexport interface ExecutionTaskInsightLocateOutput {\n element: LocateResultElement | null;\n}\n\nexport interface ExecutionTaskInsightDumpLog {\n dump?: InsightDump;\n}\n\nexport type ExecutionTaskInsightLocateApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightLocateParam,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightLocate =\n ExecutionTask<ExecutionTaskInsightLocateApply>;\n\n/*\ntask - insight-query\n*/\nexport interface ExecutionTaskInsightQueryParam {\n dataDemand: InsightExtractParam;\n}\n\nexport interface ExecutionTaskInsightQueryOutput {\n data: any;\n}\n\nexport type ExecutionTaskInsightQueryApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightQueryParam,\n any,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightQuery =\n ExecutionTask<ExecutionTaskInsightQueryApply>;\n\n/*\ntask - assertion\n*/\nexport interface ExecutionTaskInsightAssertionParam {\n assertion: string;\n}\n\nexport type ExecutionTaskInsightAssertionApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightAssertionParam,\n InsightAssertionResponse,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightAssertion =\n ExecutionTask<ExecutionTaskInsightAssertionApply>;\n\n/*\ntask - action (i.e. interact) \n*/\nexport type ExecutionTaskActionApply<ActionParam = any> = ExecutionTaskApply<\n 'Action',\n ActionParam,\n void,\n void\n>;\n\nexport type ExecutionTaskAction = ExecutionTask<ExecutionTaskActionApply>;\n\n/*\ntask - Log\n*/\n\nexport type ExecutionTaskLogApply<\n LogParam = {\n content: string;\n },\n> = ExecutionTaskApply<'Log', LogParam, void, void>;\n\nexport type ExecutionTaskLog = ExecutionTask<ExecutionTaskLogApply>;\n\n/*\ntask - planning\n*/\n\nexport type ExecutionTaskPlanningApply = ExecutionTaskApply<\n 'Planning',\n {\n userInstruction: string;\n log?: string;\n },\n PlanningAIResponse\n>;\n\nexport type ExecutionTaskPlanning = ExecutionTask<ExecutionTaskPlanningApply>;\n\n/*\nGrouped dump\n*/\nexport interface GroupedActionDump {\n groupName: string;\n groupDescription?: string;\n executions: ExecutionDump[];\n}\n\nexport type PageType =\n | 'puppeteer'\n | 'playwright'\n | 'static'\n | 'chrome-extension-proxy'\n | 'android';\n\nexport interface StreamingCodeGenerationOptions {\n /** Whether to enable streaming output */\n stream?: boolean;\n /** Callback function to handle streaming chunks */\n onChunk?: StreamingCallback;\n /** Callback function to handle streaming completion */\n onComplete?: (finalCode: string) => void;\n /** Callback function to handle streaming errors */\n onError?: (error: Error) => void;\n}\n\nexport type StreamingCallback = (chunk: CodeGenerationChunk) => void;\n\nexport interface CodeGenerationChunk {\n /** The incremental content chunk */\n content: string;\n /** The reasoning content */\n reasoning_content: string;\n /** The accumulated content so far */\n accumulated: string;\n /** Whether this is the final chunk */\n isComplete: boolean;\n /** Token usage information if available */\n usage?: AIUsageInfo;\n}\n\nexport interface StreamingAIResponse {\n /** The final accumulated content */\n content: string;\n /** Token usage information */\n usage?: AIUsageInfo;\n /** Whether the response was streamed */\n isStreamed: boolean;\n}\n\nexport type TMultimodalPrompt = {\n /**\n * Support use image to inspect elements.\n * The \"images\" field is an object that uses image name as key and image url as value.\n * The image url can be a local path, a http link , or a base64 string.\n */\n images?: {\n name: string;\n url: string;\n }[];\n /**\n * By default, the image url in the \"images\" filed starts with `https://` or `http://` will be directly sent to the LLM.\n * In case the images are not accessible to the LLM (One common case is that image url is internal network only.), you can enable this option.\n * Then image will be download and convert to base64 format.\n */\n convertHttpImage2Base64?: boolean;\n};\n\nexport type TUserPrompt =\n | string\n | ({\n prompt: string;\n } & Partial<TMultimodalPrompt>);\n\nexport interface DeviceAction<ParamType = any> {\n name: string;\n description?: string;\n paramSchema?: string;\n paramDescription?: string;\n location?: 'required' | 'optional' | false;\n whatToLocate?: string; // what to locate if location is required or optional\n call: (context: ExecutorContext, param: ParamType) => Promise<void> | void;\n}\n"],"names":["AIResponseFormat","UIContext"],"mappings":";AAgCO,IAAKA,yBAAgBA,WAAAA,GAAAA,SAAhBA,gBAAgB;;;WAAhBA;;AAwFL,MAAeC;AAMtB"}
|