@midscene/core 0.26.7-beta-20250815153024.0 → 0.26.7-beta-20250818025746.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/ai-model/common.mjs +37 -16
- package/dist/es/ai-model/common.mjs.map +1 -1
- package/dist/es/ai-model/index.mjs +2 -2
- package/dist/es/ai-model/llm-planning.mjs +1 -1
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/lib/ai-model/common.js +36 -18
- package/dist/lib/ai-model/common.js.map +1 -1
- package/dist/lib/ai-model/index.js +1 -4
- package/dist/lib/ai-model/llm-planning.js +1 -1
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/types/ai-model/common.d.ts +2 -3
- package/dist/types/ai-model/index.d.ts +0 -1
- package/dist/types/types.d.ts +0 -1
- package/dist/types/yaml.d.ts +6 -5
- package/package.json +3 -3
|
@@ -24,7 +24,6 @@ var __webpack_require__ = {};
|
|
|
24
24
|
var __webpack_exports__ = {};
|
|
25
25
|
__webpack_require__.r(__webpack_exports__);
|
|
26
26
|
__webpack_require__.d(__webpack_exports__, {
|
|
27
|
-
actionSpaceTypePrefix: ()=>actionSpaceTypePrefix,
|
|
28
27
|
markupImageForLLM: ()=>markupImageForLLM,
|
|
29
28
|
adaptBboxToRect: ()=>adaptBboxToRect,
|
|
30
29
|
expandSearchArea: ()=>expandSearchArea,
|
|
@@ -54,7 +53,6 @@ var common_AIActionType = /*#__PURE__*/ function(AIActionType) {
|
|
|
54
53
|
AIActionType[AIActionType["DESCRIBE_ELEMENT"] = 4] = "DESCRIBE_ELEMENT";
|
|
55
54
|
return AIActionType;
|
|
56
55
|
}({});
|
|
57
|
-
const actionSpaceTypePrefix = 'action_space_';
|
|
58
56
|
async function callAiFn(msgs, AIActionTypeValue) {
|
|
59
57
|
const jsonObject = await (0, index_js_namespaceObject.callToGetJSONObject)(msgs, AIActionTypeValue);
|
|
60
58
|
return {
|
|
@@ -211,31 +209,52 @@ async function markupImageForLLM(screenshotBase64, tree, size) {
|
|
|
211
209
|
});
|
|
212
210
|
return imagePayload;
|
|
213
211
|
}
|
|
214
|
-
function buildYamlFlowFromPlans(plans,
|
|
212
|
+
function buildYamlFlowFromPlans(plans, sleep) {
|
|
215
213
|
const flow = [];
|
|
216
214
|
for (const plan of plans){
|
|
217
215
|
var _plan_locate;
|
|
218
|
-
const
|
|
219
|
-
const action = actionSpace.find((action)=>action.name === verb);
|
|
220
|
-
if (!action) {
|
|
221
|
-
console.warn(`Cannot convert action ${verb} to yaml flow. Will ignore it.`);
|
|
222
|
-
continue;
|
|
223
|
-
}
|
|
216
|
+
const type = plan.type;
|
|
224
217
|
const locate = null == (_plan_locate = plan.locate) ? void 0 : _plan_locate.prompt;
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
218
|
+
if ('Tap' === type) flow.push({
|
|
219
|
+
aiTap: locate
|
|
220
|
+
});
|
|
221
|
+
else if ('Hover' === type) flow.push({
|
|
222
|
+
aiHover: locate
|
|
223
|
+
});
|
|
224
|
+
else if ('Input' === type) {
|
|
225
|
+
const param = plan.param;
|
|
226
|
+
flow.push({
|
|
227
|
+
aiInput: param.value,
|
|
228
|
+
locate
|
|
229
|
+
});
|
|
230
|
+
} else if ('KeyboardPress' === type) {
|
|
231
|
+
const param = plan.param;
|
|
232
|
+
flow.push({
|
|
233
|
+
aiKeyboardPress: param.value,
|
|
234
|
+
locate
|
|
235
|
+
});
|
|
236
|
+
} else if ('Scroll' === type) {
|
|
237
|
+
const param = plan.param;
|
|
238
|
+
flow.push({
|
|
239
|
+
aiScroll: null,
|
|
240
|
+
locate,
|
|
241
|
+
direction: param.direction,
|
|
242
|
+
scrollType: param.scrollType,
|
|
243
|
+
distance: param.distance
|
|
244
|
+
});
|
|
245
|
+
} else if ('Sleep' === type) {
|
|
246
|
+
const param = plan.param;
|
|
247
|
+
flow.push({
|
|
248
|
+
sleep: param.timeMs
|
|
249
|
+
});
|
|
250
|
+
} else 'AndroidBackButton' === type || 'AndroidHomeButton' === type || 'AndroidRecentAppsButton' === type || 'AndroidLongPress' === type || 'AndroidPull' === type || 'Error' === type || 'Assert' === type || 'AssertWithoutThrow' === type || 'Finished' === type || console.warn(`Cannot convert action ${type} to yaml flow. This should be a bug of Midscene.`);
|
|
231
251
|
}
|
|
232
252
|
if (sleep) flow.push({
|
|
233
|
-
sleep
|
|
253
|
+
sleep: sleep
|
|
234
254
|
});
|
|
235
255
|
return flow;
|
|
236
256
|
}
|
|
237
257
|
exports.AIActionType = __webpack_exports__.AIActionType;
|
|
238
|
-
exports.actionSpaceTypePrefix = __webpack_exports__.actionSpaceTypePrefix;
|
|
239
258
|
exports.adaptBbox = __webpack_exports__.adaptBbox;
|
|
240
259
|
exports.adaptBboxToRect = __webpack_exports__.adaptBboxToRect;
|
|
241
260
|
exports.adaptDoubaoBbox = __webpack_exports__.adaptDoubaoBbox;
|
|
@@ -250,7 +269,6 @@ exports.mergeRects = __webpack_exports__.mergeRects;
|
|
|
250
269
|
exports.warnGPT4oSizeLimit = __webpack_exports__.warnGPT4oSizeLimit;
|
|
251
270
|
for(var __webpack_i__ in __webpack_exports__)if (-1 === [
|
|
252
271
|
"AIActionType",
|
|
253
|
-
"actionSpaceTypePrefix",
|
|
254
272
|
"adaptBbox",
|
|
255
273
|
"adaptBboxToRect",
|
|
256
274
|
"adaptDoubaoBbox",
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/common.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/common.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n AIUsageInfo,\n BaseElement,\n DeviceAction,\n ElementTreeNode,\n MidsceneYamlFlowItem,\n PlanningAction,\n Rect,\n Size,\n} from '@/types';\nimport { assert } from '@midscene/shared/utils';\n\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport {\n call,\n callToGetJSONObject,\n getModelName,\n} from './service-caller/index';\n\nimport type { PlanningLocateParam } from '@/types';\nimport { NodeType } from '@midscene/shared/constants';\nimport { vlLocateMode } from '@midscene/shared/env';\nimport { treeToList } from '@midscene/shared/extractor';\nimport { compositeElementInfoImg } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nexport enum AIActionType {\n ASSERT = 0,\n INSPECT_ELEMENT = 1,\n EXTRACT_DATA = 2,\n PLAN = 3,\n DESCRIBE_ELEMENT = 4,\n}\n\nexport const actionSpaceTypePrefix = 'action_space_';\n\nexport async function callAiFn<T>(\n msgs: AIArgs,\n AIActionTypeValue: AIActionType,\n): Promise<{ content: T; usage?: AIUsageInfo }> {\n const jsonObject = await callToGetJSONObject<T>(msgs, AIActionTypeValue);\n\n return {\n content: jsonObject.content,\n usage: jsonObject.usage,\n };\n}\n\nconst defaultBboxSize = 20; // must be even number\nconst debugInspectUtils = getDebug('ai:common');\n\n// transform the param of locate from qwen mode\nexport function fillBboxParam(\n locate: PlanningLocateParam,\n width: number,\n height: number,\n) {\n // The Qwen model might have hallucinations of naming bbox as bbox_2d.\n if ((locate as any).bbox_2d && !locate?.bbox) {\n locate.bbox = (locate as any).bbox_2d;\n // biome-ignore lint/performance/noDelete: <explanation>\n delete (locate as any).bbox_2d;\n }\n\n if (locate?.bbox) {\n locate.bbox = adaptBbox(locate.bbox, width, height);\n }\n\n return locate;\n}\n\nexport function adaptQwenBbox(\n bbox: number[],\n): [number, number, number, number] {\n if (bbox.length < 2) {\n const msg = `invalid bbox data for qwen-vl mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n }\n\n const result: [number, number, number, number] = [\n Math.round(bbox[0]),\n Math.round(bbox[1]),\n typeof bbox[2] === 'number'\n ? Math.round(bbox[2])\n : Math.round(bbox[0] + defaultBboxSize),\n typeof bbox[3] === 'number'\n ? Math.round(bbox[3])\n : Math.round(bbox[1] + defaultBboxSize),\n ];\n return result;\n}\n\nexport function adaptDoubaoBbox(\n bbox: string[] | number[] | string,\n width: number,\n height: number,\n): [number, number, number, number] {\n assert(\n width > 0 && height > 0,\n 'width and height must be greater than 0 in doubao mode',\n );\n\n if (typeof bbox === 'string') {\n assert(\n /^(\\d+)\\s(\\d+)\\s(\\d+)\\s(\\d+)$/.test(bbox.trim()),\n `invalid bbox data string for doubao-vision mode: ${bbox}`,\n );\n const splitted = bbox.split(' ');\n if (splitted.length === 4) {\n return [\n Math.round((Number(splitted[0]) * width) / 1000),\n Math.round((Number(splitted[1]) * height) / 1000),\n Math.round((Number(splitted[2]) * width) / 1000),\n Math.round((Number(splitted[3]) * height) / 1000),\n ];\n }\n throw new Error(`invalid bbox data string for doubao-vision mode: ${bbox}`);\n }\n\n if (Array.isArray(bbox) && Array.isArray(bbox[0])) {\n bbox = bbox[0];\n }\n\n let bboxList: number[] = [];\n if (Array.isArray(bbox) && typeof bbox[0] === 'string') {\n bbox.forEach((item) => {\n if (typeof item === 'string' && item.includes(',')) {\n const [x, y] = item.split(',');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else if (typeof item === 'string' && item.includes(' ')) {\n const [x, y] = item.split(' ');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else {\n bboxList.push(Number(item));\n }\n });\n } else {\n bboxList = bbox as any;\n }\n\n if (bboxList.length === 4 || bboxList.length === 5) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[2] * width) / 1000),\n Math.round((bboxList[3] * height) / 1000),\n ];\n }\n\n // treat the bbox as a center point\n if (\n bboxList.length === 6 ||\n bboxList.length === 2 ||\n bboxList.length === 3 ||\n bboxList.length === 7\n ) {\n return [\n Math.max(\n 0,\n Math.round((bboxList[0] * width) / 1000) - defaultBboxSize / 2,\n ),\n Math.max(\n 0,\n Math.round((bboxList[1] * height) / 1000) - defaultBboxSize / 2,\n ),\n Math.min(\n width,\n Math.round((bboxList[0] * width) / 1000) + defaultBboxSize / 2,\n ),\n Math.min(\n height,\n Math.round((bboxList[1] * height) / 1000) + defaultBboxSize / 2,\n ),\n ];\n }\n\n if (bbox.length === 8) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[4] * width) / 1000),\n Math.round((bboxList[5] * height) / 1000),\n ];\n }\n\n const msg = `invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n}\n\nexport function adaptBbox(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n if (vlLocateMode() === 'doubao-vision' || vlLocateMode() === 'vlm-ui-tars') {\n return adaptDoubaoBbox(bbox, width, height);\n }\n\n if (vlLocateMode() === 'gemini') {\n return adaptGeminiBbox(bbox, width, height);\n }\n\n return adaptQwenBbox(bbox);\n}\n\nexport function adaptGeminiBbox(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n const left = Math.round((bbox[1] * width) / 1000);\n const top = Math.round((bbox[0] * height) / 1000);\n const right = Math.round((bbox[3] * width) / 1000);\n const bottom = Math.round((bbox[2] * height) / 1000);\n return [left, top, right, bottom];\n}\n\nexport function adaptBboxToRect(\n bbox: number[],\n width: number,\n height: number,\n offsetX = 0,\n offsetY = 0,\n): Rect {\n debugInspectUtils('adaptBboxToRect', bbox, width, height, offsetX, offsetY);\n const [left, top, right, bottom] = adaptBbox(bbox, width, height);\n const rect = {\n left: left + offsetX,\n top: top + offsetY,\n width: right - left,\n height: bottom - top,\n };\n debugInspectUtils('adaptBboxToRect, result=', rect);\n return rect;\n}\n\nlet warned = false;\nexport function warnGPT4oSizeLimit(size: Size) {\n if (warned) return;\n if (getModelName()?.toLowerCase().includes('gpt-4o')) {\n const warningMsg = `GPT-4o has a maximum image input size of 2000x768 or 768x2000, but got ${size.width}x${size.height}. Please set your page to a smaller resolution. Otherwise, the result may be inaccurate.`;\n\n if (\n Math.max(size.width, size.height) > 2000 ||\n Math.min(size.width, size.height) > 768\n ) {\n console.warn(warningMsg);\n warned = true;\n }\n } else if (size.width > 1800 || size.height > 1800) {\n console.warn(\n `The image size seems too large (${size.width}x${size.height}). It may lead to more token usage, slower response, and inaccurate result.`,\n );\n warned = true;\n }\n}\n\nexport function mergeRects(rects: Rect[]) {\n const minLeft = Math.min(...rects.map((r) => r.left));\n const minTop = Math.min(...rects.map((r) => r.top));\n const maxRight = Math.max(...rects.map((r) => r.left + r.width));\n const maxBottom = Math.max(...rects.map((r) => r.top + r.height));\n return {\n left: minLeft,\n top: minTop,\n width: maxRight - minLeft,\n height: maxBottom - minTop,\n };\n}\n\n// expand the search area to at least 300 x 300, or add a default padding\nexport function expandSearchArea(rect: Rect, screenSize: Size) {\n const minEdgeSize = vlLocateMode() === 'doubao-vision' ? 500 : 300;\n const defaultPadding = 160;\n\n const paddingSizeHorizontal =\n rect.width < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.width) / 2)\n : defaultPadding;\n const paddingSizeVertical =\n rect.height < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.height) / 2)\n : defaultPadding;\n rect.left = Math.max(0, rect.left - paddingSizeHorizontal);\n rect.width = Math.min(\n rect.width + paddingSizeHorizontal * 2,\n screenSize.width - rect.left,\n );\n rect.top = Math.max(0, rect.top - paddingSizeVertical);\n rect.height = Math.min(\n rect.height + paddingSizeVertical * 2,\n screenSize.height - rect.top,\n );\n return rect;\n}\n\nexport async function markupImageForLLM(\n screenshotBase64: string,\n tree: ElementTreeNode<BaseElement>,\n size: Size,\n) {\n const elementsInfo = treeToList(tree);\n const elementsPositionInfoWithoutText = elementsInfo!.filter(\n (elementInfo) => {\n if (elementInfo.attributes.nodeType === NodeType.TEXT) {\n return false;\n }\n return true;\n },\n );\n\n const imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n elementsPositionInfo: elementsPositionInfoWithoutText,\n size,\n });\n return imagePayload;\n}\n\nexport function buildYamlFlowFromPlans(\n plans: PlanningAction[],\n actionSpace: DeviceAction[],\n sleep?: number,\n): MidsceneYamlFlowItem[] {\n const flow: MidsceneYamlFlowItem[] = [];\n\n for (const plan of plans) {\n const verb = plan.type;\n\n const action = actionSpace.find((action) => action.name === verb);\n if (!action) {\n console.warn(\n `Cannot convert action ${verb} to yaml flow. Will ignore it.`,\n );\n continue;\n }\n\n const locate = plan.locate?.prompt;\n const flowKey = action.interfaceAlias || `${actionSpaceTypePrefix}${verb}`;\n\n const flowItem: MidsceneYamlFlowItem = {\n [flowKey]: locate || '',\n ...(plan.param || {}),\n };\n\n flow.push(flowItem);\n }\n\n if (sleep) {\n flow.push({\n sleep,\n });\n }\n\n return flow;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","AIActionType","actionSpaceTypePrefix","callAiFn","msgs","AIActionTypeValue","jsonObject","callToGetJSONObject","defaultBboxSize","debugInspectUtils","getDebug","fillBboxParam","locate","width","height","adaptBbox","adaptQwenBbox","bbox","msg","JSON","Error","result","Math","adaptDoubaoBbox","assert","splitted","Number","Array","bboxList","item","x","y","vlLocateMode","adaptGeminiBbox","left","top","right","bottom","adaptBboxToRect","offsetX","offsetY","rect","warned","warnGPT4oSizeLimit","size","_getModelName","warningMsg","console","mergeRects","rects","minLeft","r","minTop","maxRight","maxBottom","expandSearchArea","screenSize","minEdgeSize","defaultPadding","paddingSizeHorizontal","paddingSizeVertical","markupImageForLLM","screenshotBase64","tree","elementsInfo","treeToList","elementsPositionInfoWithoutText","elementInfo","NodeType","imagePayload","compositeElementInfoImg","buildYamlFlowFromPlans","plans","actionSpace","sleep","flow","plan","_plan_locate","verb","action","flowKey","flowItem"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;;;;;;;;AC4BO,IAAKI,sBAAYA,WAAAA,GAAAA,SAAZA,YAAY;;;;;;WAAZA;;AAQL,MAAMC,wBAAwB;AAE9B,eAAeC,SACpBC,IAAY,EACZC,iBAA+B;IAE/B,MAAMC,aAAa,MAAMC,AAAAA,IAAAA,yBAAAA,mBAAAA,AAAAA,EAAuBH,MAAMC;IAEtD,OAAO;QACL,SAASC,WAAW,OAAO;QAC3B,OAAOA,WAAW,KAAK;IACzB;AACF;AAEA,MAAME,kBAAkB;AACxB,MAAMC,oBAAoBC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAG5B,SAASC,cACdC,MAA2B,EAC3BC,KAAa,EACbC,MAAc;IAGd,IAAKF,OAAe,OAAO,IAAI,CAACA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,AAAD,GAAG;QAC5CA,OAAO,IAAI,GAAIA,OAAe,OAAO;QAErC,OAAQA,OAAe,OAAO;IAChC;IAEA,IAAIA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,EACdA,OAAO,IAAI,GAAGG,UAAUH,OAAO,IAAI,EAAEC,OAAOC;IAG9C,OAAOF;AACT;AAEO,SAASI,cACdC,IAAc;IAEd,IAAIA,KAAK,MAAM,GAAG,GAAG;QACnB,MAAMC,MAAM,CAAC,oCAAoC,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;QAC1E,MAAM,IAAIG,MAAMF;IAClB;IAEA,MAAMG,SAA2C;QAC/CC,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QACC,YAAnB,OAAOA,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGT;QACN,YAAnB,OAAOS,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGT;KAC1B;IACD,OAAOa;AACT;AAEO,SAASE,gBACdN,IAAkC,EAClCJ,KAAa,EACbC,MAAc;IAEdU,IAAAA,sBAAAA,MAAAA,AAAAA,EACEX,QAAQ,KAAKC,SAAS,GACtB;IAGF,IAAI,AAAgB,YAAhB,OAAOG,MAAmB;QAC5BO,IAAAA,sBAAAA,MAAAA,AAAAA,EACE,+BAA+B,IAAI,CAACP,KAAK,IAAI,KAC7C,CAAC,iDAAiD,EAAEA,MAAM;QAE5D,MAAMQ,WAAWR,KAAK,KAAK,CAAC;QAC5B,IAAIQ,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACLH,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIZ,QAAS;YAC3CS,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIX,SAAU;YAC5CQ,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIZ,QAAS;YAC3CS,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIX,SAAU;SAC7C;QAEH,MAAM,IAAIM,MAAM,CAAC,iDAAiD,EAAEH,MAAM;IAC5E;IAEA,IAAIU,MAAM,OAAO,CAACV,SAASU,MAAM,OAAO,CAACV,IAAI,CAAC,EAAE,GAC9CA,OAAOA,IAAI,CAAC,EAAE;IAGhB,IAAIW,WAAqB,EAAE;IAC3B,IAAID,MAAM,OAAO,CAACV,SAAS,AAAmB,YAAnB,OAAOA,IAAI,CAAC,EAAE,EACvCA,KAAK,OAAO,CAAC,CAACY;QACZ,IAAI,AAAgB,YAAhB,OAAOA,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YAClD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OAAO,IAAI,AAAgB,YAAhB,OAAOF,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YACzD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OACEH,SAAS,IAAI,CAACF,OAAOG;IAEzB;SAEAD,WAAWX;IAGb,IAAIW,AAAoB,MAApBA,SAAS,MAAM,IAAUA,AAAoB,MAApBA,SAAS,MAAM,EAC1C,OAAO;QACLN,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;QACpCQ,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;KACrC;IAIH,IACEc,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,EAEf,OAAO;QACLN,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS,QAAQL,kBAAkB;QAE/Dc,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU,QAAQN,kBAAkB;QAEhEc,KAAK,GAAG,CACNT,OACAS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS,QAAQL,kBAAkB;QAE/Dc,KAAK,GAAG,CACNR,QACAQ,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU,QAAQN,kBAAkB;KAEjE;IAGH,IAAIS,AAAgB,MAAhBA,KAAK,MAAM,EACb,OAAO;QACLK,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;QACpCQ,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;KACrC;IAGH,MAAMI,MAAM,CAAC,0CAA0C,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;IAChF,MAAM,IAAIG,MAAMF;AAClB;AAEO,SAASH,UACdE,IAAc,EACdJ,KAAa,EACbC,MAAc;IAEd,IAAIkB,AAAmB,oBAAnBA,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,OAAsCA,AAAmB,kBAAnBA,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,KACxC,OAAOT,gBAAgBN,MAAMJ,OAAOC;IAGtC,IAAIkB,AAAmB,aAAnBA,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,KACF,OAAOC,gBAAgBhB,MAAMJ,OAAOC;IAGtC,OAAOE,cAAcC;AACvB;AAEO,SAASgB,gBACdhB,IAAc,EACdJ,KAAa,EACbC,MAAc;IAEd,MAAMoB,OAAOZ,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGJ,QAAS;IAC5C,MAAMsB,MAAMb,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGH,SAAU;IAC5C,MAAMsB,QAAQd,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGJ,QAAS;IAC7C,MAAMwB,SAASf,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGH,SAAU;IAC/C,OAAO;QAACoB;QAAMC;QAAKC;QAAOC;KAAO;AACnC;AAEO,SAASC,gBACdrB,IAAc,EACdJ,KAAa,EACbC,MAAc,EACdyB,UAAU,CAAC,EACXC,UAAU,CAAC;IAEX/B,kBAAkB,mBAAmBQ,MAAMJ,OAAOC,QAAQyB,SAASC;IACnE,MAAM,CAACN,MAAMC,KAAKC,OAAOC,OAAO,GAAGtB,UAAUE,MAAMJ,OAAOC;IAC1D,MAAM2B,OAAO;QACX,MAAMP,OAAOK;QACb,KAAKJ,MAAMK;QACX,OAAOJ,QAAQF;QACf,QAAQG,SAASF;IACnB;IACA1B,kBAAkB,4BAA4BgC;IAC9C,OAAOA;AACT;AAEA,IAAIC,SAAS;AACN,SAASC,mBAAmBC,IAAU;QAEvCC;IADJ,IAAIH,QAAQ;IACZ,IAAI,QAAAG,CAAAA,gBAAAA,AAAAA,IAAAA,yBAAAA,YAAAA,AAAAA,GAAa,IAAbA,KAAAA,IAAAA,cAAgB,WAAW,GAAG,QAAQ,CAAC,WAAW;QACpD,MAAMC,aAAa,CAAC,uEAAuE,EAAEF,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,wFAAwF,CAAC;QAEhN,IACEtB,KAAK,GAAG,CAACsB,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,QACpCtB,KAAK,GAAG,CAACsB,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,KACpC;YACAG,QAAQ,IAAI,CAACD;YACbJ,SAAS;QACX;IACF,OAAO,IAAIE,KAAK,KAAK,GAAG,QAAQA,KAAK,MAAM,GAAG,MAAM;QAClDG,QAAQ,IAAI,CACV,CAAC,gCAAgC,EAAEH,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,2EAA2E,CAAC;QAE3IF,SAAS;IACX;AACF;AAEO,SAASM,WAAWC,KAAa;IACtC,MAAMC,UAAU5B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI;IACnD,MAAMC,SAAS9B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG;IACjD,MAAME,WAAW/B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI,GAAGA,EAAE,KAAK;IAC9D,MAAMG,YAAYhC,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG,GAAGA,EAAE,MAAM;IAC/D,OAAO;QACL,MAAMD;QACN,KAAKE;QACL,OAAOC,WAAWH;QAClB,QAAQI,YAAYF;IACtB;AACF;AAGO,SAASG,iBAAiBd,IAAU,EAAEe,UAAgB;IAC3D,MAAMC,cAAczB,AAAmB,oBAAnBA,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,MAAqC,MAAM;IAC/D,MAAM0B,iBAAiB;IAEvB,MAAMC,wBACJlB,KAAK,KAAK,GAAGgB,cACTnC,KAAK,IAAI,CAAEmC,AAAAA,CAAAA,cAAchB,KAAK,KAAI,IAAK,KACvCiB;IACN,MAAME,sBACJnB,KAAK,MAAM,GAAGgB,cACVnC,KAAK,IAAI,CAAEmC,AAAAA,CAAAA,cAAchB,KAAK,MAAK,IAAK,KACxCiB;IACNjB,KAAK,IAAI,GAAGnB,KAAK,GAAG,CAAC,GAAGmB,KAAK,IAAI,GAAGkB;IACpClB,KAAK,KAAK,GAAGnB,KAAK,GAAG,CACnBmB,KAAK,KAAK,GAAGkB,AAAwB,IAAxBA,uBACbH,WAAW,KAAK,GAAGf,KAAK,IAAI;IAE9BA,KAAK,GAAG,GAAGnB,KAAK,GAAG,CAAC,GAAGmB,KAAK,GAAG,GAAGmB;IAClCnB,KAAK,MAAM,GAAGnB,KAAK,GAAG,CACpBmB,KAAK,MAAM,GAAGmB,AAAsB,IAAtBA,qBACdJ,WAAW,MAAM,GAAGf,KAAK,GAAG;IAE9B,OAAOA;AACT;AAEO,eAAeoB,kBACpBC,gBAAwB,EACxBC,IAAkC,EAClCnB,IAAU;IAEV,MAAMoB,eAAeC,AAAAA,IAAAA,0BAAAA,UAAAA,AAAAA,EAAWF;IAChC,MAAMG,kCAAkCF,aAAc,MAAM,CAC1D,CAACG;QACC,IAAIA,YAAY,UAAU,CAAC,QAAQ,KAAKC,0BAAAA,QAAAA,CAAAA,IAAa,EACnD,OAAO;QAET,OAAO;IACT;IAGF,MAAMC,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,uBAAAA,AAAAA,EAAwB;QACjD,gBAAgBR;QAChB,sBAAsBI;QACtBtB;IACF;IACA,OAAOyB;AACT;AAEO,SAASE,uBACdC,KAAuB,EACvBC,WAA2B,EAC3BC,KAAc;IAEd,MAAMC,OAA+B,EAAE;IAEvC,KAAK,MAAMC,QAAQJ,MAAO;YAWTK;QAVf,MAAMC,OAAOF,KAAK,IAAI;QAEtB,MAAMG,SAASN,YAAY,IAAI,CAAC,CAACM,SAAWA,OAAO,IAAI,KAAKD;QAC5D,IAAI,CAACC,QAAQ;YACXhC,QAAQ,IAAI,CACV,CAAC,sBAAsB,EAAE+B,KAAK,8BAA8B,CAAC;YAE/D;QACF;QAEA,MAAMlE,SAAS,QAAAiE,CAAAA,eAAAA,KAAK,MAAM,AAAD,IAAVA,KAAAA,IAAAA,aAAa,MAAM;QAClC,MAAMG,UAAUD,OAAO,cAAc,IAAI,GAAG7E,wBAAwB4E,MAAM;QAE1E,MAAMG,WAAiC;YACrC,CAACD,QAAQ,EAAEpE,UAAU;YACrB,GAAIgE,KAAK,KAAK,IAAI,CAAC,CAAC;QACtB;QAEAD,KAAK,IAAI,CAACM;IACZ;IAEA,IAAIP,OACFC,KAAK,IAAI,CAAC;QACRD;IACF;IAGF,OAAOC;AACT"}
|
|
1
|
+
{"version":3,"file":"ai-model/common.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/common.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n AIUsageInfo,\n BaseElement,\n ElementTreeNode,\n MidsceneYamlFlowItem,\n PlanningAction,\n PlanningActionParamInputOrKeyPress,\n PlanningActionParamSleep,\n Rect,\n ScrollParam,\n Size,\n} from '@/types';\nimport { assert } from '@midscene/shared/utils';\n\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport {\n call,\n callToGetJSONObject,\n getModelName,\n} from './service-caller/index';\n\nimport type { PlanningLocateParam } from '@/types';\nimport { NodeType } from '@midscene/shared/constants';\nimport { vlLocateMode } from '@midscene/shared/env';\nimport { treeToList } from '@midscene/shared/extractor';\nimport { compositeElementInfoImg } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nexport enum AIActionType {\n ASSERT = 0,\n INSPECT_ELEMENT = 1,\n EXTRACT_DATA = 2,\n PLAN = 3,\n DESCRIBE_ELEMENT = 4,\n}\n\nexport async function callAiFn<T>(\n msgs: AIArgs,\n AIActionTypeValue: AIActionType,\n): Promise<{ content: T; usage?: AIUsageInfo }> {\n const jsonObject = await callToGetJSONObject<T>(msgs, AIActionTypeValue);\n\n return {\n content: jsonObject.content,\n usage: jsonObject.usage,\n };\n}\n\nconst defaultBboxSize = 20; // must be even number\nconst debugInspectUtils = getDebug('ai:common');\n\n// transform the param of locate from qwen mode\nexport function fillBboxParam(\n locate: PlanningLocateParam,\n width: number,\n height: number,\n) {\n // The Qwen model might have hallucinations of naming bbox as bbox_2d.\n if ((locate as any).bbox_2d && !locate?.bbox) {\n locate.bbox = (locate as any).bbox_2d;\n // biome-ignore lint/performance/noDelete: <explanation>\n delete (locate as any).bbox_2d;\n }\n\n if (locate?.bbox) {\n locate.bbox = adaptBbox(locate.bbox, width, height);\n }\n\n return locate;\n}\n\nexport function adaptQwenBbox(\n bbox: number[],\n): [number, number, number, number] {\n if (bbox.length < 2) {\n const msg = `invalid bbox data for qwen-vl mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n }\n\n const result: [number, number, number, number] = [\n Math.round(bbox[0]),\n Math.round(bbox[1]),\n typeof bbox[2] === 'number'\n ? Math.round(bbox[2])\n : Math.round(bbox[0] + defaultBboxSize),\n typeof bbox[3] === 'number'\n ? Math.round(bbox[3])\n : Math.round(bbox[1] + defaultBboxSize),\n ];\n return result;\n}\n\nexport function adaptDoubaoBbox(\n bbox: string[] | number[] | string,\n width: number,\n height: number,\n): [number, number, number, number] {\n assert(\n width > 0 && height > 0,\n 'width and height must be greater than 0 in doubao mode',\n );\n\n if (typeof bbox === 'string') {\n assert(\n /^(\\d+)\\s(\\d+)\\s(\\d+)\\s(\\d+)$/.test(bbox.trim()),\n `invalid bbox data string for doubao-vision mode: ${bbox}`,\n );\n const splitted = bbox.split(' ');\n if (splitted.length === 4) {\n return [\n Math.round((Number(splitted[0]) * width) / 1000),\n Math.round((Number(splitted[1]) * height) / 1000),\n Math.round((Number(splitted[2]) * width) / 1000),\n Math.round((Number(splitted[3]) * height) / 1000),\n ];\n }\n throw new Error(`invalid bbox data string for doubao-vision mode: ${bbox}`);\n }\n\n if (Array.isArray(bbox) && Array.isArray(bbox[0])) {\n bbox = bbox[0];\n }\n\n let bboxList: number[] = [];\n if (Array.isArray(bbox) && typeof bbox[0] === 'string') {\n bbox.forEach((item) => {\n if (typeof item === 'string' && item.includes(',')) {\n const [x, y] = item.split(',');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else if (typeof item === 'string' && item.includes(' ')) {\n const [x, y] = item.split(' ');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else {\n bboxList.push(Number(item));\n }\n });\n } else {\n bboxList = bbox as any;\n }\n\n if (bboxList.length === 4 || bboxList.length === 5) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[2] * width) / 1000),\n Math.round((bboxList[3] * height) / 1000),\n ];\n }\n\n // treat the bbox as a center point\n if (\n bboxList.length === 6 ||\n bboxList.length === 2 ||\n bboxList.length === 3 ||\n bboxList.length === 7\n ) {\n return [\n Math.max(\n 0,\n Math.round((bboxList[0] * width) / 1000) - defaultBboxSize / 2,\n ),\n Math.max(\n 0,\n Math.round((bboxList[1] * height) / 1000) - defaultBboxSize / 2,\n ),\n Math.min(\n width,\n Math.round((bboxList[0] * width) / 1000) + defaultBboxSize / 2,\n ),\n Math.min(\n height,\n Math.round((bboxList[1] * height) / 1000) + defaultBboxSize / 2,\n ),\n ];\n }\n\n if (bbox.length === 8) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[4] * width) / 1000),\n Math.round((bboxList[5] * height) / 1000),\n ];\n }\n\n const msg = `invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n}\n\nexport function adaptBbox(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n if (vlLocateMode() === 'doubao-vision' || vlLocateMode() === 'vlm-ui-tars') {\n return adaptDoubaoBbox(bbox, width, height);\n }\n\n if (vlLocateMode() === 'gemini') {\n return adaptGeminiBbox(bbox, width, height);\n }\n\n return adaptQwenBbox(bbox);\n}\n\nexport function adaptGeminiBbox(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n const left = Math.round((bbox[1] * width) / 1000);\n const top = Math.round((bbox[0] * height) / 1000);\n const right = Math.round((bbox[3] * width) / 1000);\n const bottom = Math.round((bbox[2] * height) / 1000);\n return [left, top, right, bottom];\n}\n\nexport function adaptBboxToRect(\n bbox: number[],\n width: number,\n height: number,\n offsetX = 0,\n offsetY = 0,\n): Rect {\n debugInspectUtils('adaptBboxToRect', bbox, width, height, offsetX, offsetY);\n const [left, top, right, bottom] = adaptBbox(bbox, width, height);\n const rect = {\n left: left + offsetX,\n top: top + offsetY,\n width: right - left,\n height: bottom - top,\n };\n debugInspectUtils('adaptBboxToRect, result=', rect);\n return rect;\n}\n\nlet warned = false;\nexport function warnGPT4oSizeLimit(size: Size) {\n if (warned) return;\n if (getModelName()?.toLowerCase().includes('gpt-4o')) {\n const warningMsg = `GPT-4o has a maximum image input size of 2000x768 or 768x2000, but got ${size.width}x${size.height}. Please set your page to a smaller resolution. Otherwise, the result may be inaccurate.`;\n\n if (\n Math.max(size.width, size.height) > 2000 ||\n Math.min(size.width, size.height) > 768\n ) {\n console.warn(warningMsg);\n warned = true;\n }\n } else if (size.width > 1800 || size.height > 1800) {\n console.warn(\n `The image size seems too large (${size.width}x${size.height}). It may lead to more token usage, slower response, and inaccurate result.`,\n );\n warned = true;\n }\n}\n\nexport function mergeRects(rects: Rect[]) {\n const minLeft = Math.min(...rects.map((r) => r.left));\n const minTop = Math.min(...rects.map((r) => r.top));\n const maxRight = Math.max(...rects.map((r) => r.left + r.width));\n const maxBottom = Math.max(...rects.map((r) => r.top + r.height));\n return {\n left: minLeft,\n top: minTop,\n width: maxRight - minLeft,\n height: maxBottom - minTop,\n };\n}\n\n// expand the search area to at least 300 x 300, or add a default padding\nexport function expandSearchArea(rect: Rect, screenSize: Size) {\n const minEdgeSize = vlLocateMode() === 'doubao-vision' ? 500 : 300;\n const defaultPadding = 160;\n\n const paddingSizeHorizontal =\n rect.width < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.width) / 2)\n : defaultPadding;\n const paddingSizeVertical =\n rect.height < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.height) / 2)\n : defaultPadding;\n rect.left = Math.max(0, rect.left - paddingSizeHorizontal);\n rect.width = Math.min(\n rect.width + paddingSizeHorizontal * 2,\n screenSize.width - rect.left,\n );\n rect.top = Math.max(0, rect.top - paddingSizeVertical);\n rect.height = Math.min(\n rect.height + paddingSizeVertical * 2,\n screenSize.height - rect.top,\n );\n return rect;\n}\n\nexport async function markupImageForLLM(\n screenshotBase64: string,\n tree: ElementTreeNode<BaseElement>,\n size: Size,\n) {\n const elementsInfo = treeToList(tree);\n const elementsPositionInfoWithoutText = elementsInfo!.filter(\n (elementInfo) => {\n if (elementInfo.attributes.nodeType === NodeType.TEXT) {\n return false;\n }\n return true;\n },\n );\n\n const imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n elementsPositionInfo: elementsPositionInfoWithoutText,\n size,\n });\n return imagePayload;\n}\n\nexport function buildYamlFlowFromPlans(\n plans: PlanningAction[],\n sleep?: number,\n): MidsceneYamlFlowItem[] {\n const flow: MidsceneYamlFlowItem[] = [];\n\n for (const plan of plans) {\n const type = plan.type;\n const locate = plan.locate?.prompt!; // TODO: check if locate is null\n\n if (type === 'Tap') {\n flow.push({\n aiTap: locate!,\n });\n } else if (type === 'Hover') {\n flow.push({\n aiHover: locate!,\n });\n } else if (type === 'Input') {\n const param = plan.param as PlanningActionParamInputOrKeyPress;\n flow.push({\n aiInput: param.value,\n locate,\n });\n } else if (type === 'KeyboardPress') {\n const param = plan.param as PlanningActionParamInputOrKeyPress;\n flow.push({\n aiKeyboardPress: param.value,\n locate,\n });\n } else if (type === 'Scroll') {\n const param = plan.param as ScrollParam;\n flow.push({\n aiScroll: null,\n locate,\n direction: param.direction,\n scrollType: param.scrollType,\n distance: param.distance,\n });\n } else if (type === 'Sleep') {\n const param = plan.param as PlanningActionParamSleep;\n flow.push({\n sleep: param.timeMs,\n });\n } else if (\n type === 'AndroidBackButton' ||\n type === 'AndroidHomeButton' ||\n type === 'AndroidRecentAppsButton' ||\n type === 'AndroidLongPress' ||\n type === 'AndroidPull'\n ) {\n // not implemented in yaml yet\n } else if (\n type === 'Error' ||\n type === 'Assert' ||\n type === 'AssertWithoutThrow' ||\n type === 'Finished'\n ) {\n // do nothing\n } else {\n console.warn(\n `Cannot convert action ${type} to yaml flow. This should be a bug of Midscene.`,\n );\n }\n }\n\n if (sleep) {\n flow.push({\n sleep: sleep,\n });\n }\n\n return flow;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","AIActionType","callAiFn","msgs","AIActionTypeValue","jsonObject","callToGetJSONObject","defaultBboxSize","debugInspectUtils","getDebug","fillBboxParam","locate","width","height","adaptBbox","adaptQwenBbox","bbox","msg","JSON","Error","result","Math","adaptDoubaoBbox","assert","splitted","Number","Array","bboxList","item","x","y","vlLocateMode","adaptGeminiBbox","left","top","right","bottom","adaptBboxToRect","offsetX","offsetY","rect","warned","warnGPT4oSizeLimit","size","_getModelName","warningMsg","console","mergeRects","rects","minLeft","r","minTop","maxRight","maxBottom","expandSearchArea","screenSize","minEdgeSize","defaultPadding","paddingSizeHorizontal","paddingSizeVertical","markupImageForLLM","screenshotBase64","tree","elementsInfo","treeToList","elementsPositionInfoWithoutText","elementInfo","NodeType","imagePayload","compositeElementInfoImg","buildYamlFlowFromPlans","plans","sleep","flow","plan","_plan_locate","type","param"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;;;;;;;AC8BO,IAAKI,sBAAYA,WAAAA,GAAAA,SAAZA,YAAY;;;;;;WAAZA;;AAQL,eAAeC,SACpBC,IAAY,EACZC,iBAA+B;IAE/B,MAAMC,aAAa,MAAMC,AAAAA,IAAAA,yBAAAA,mBAAAA,AAAAA,EAAuBH,MAAMC;IAEtD,OAAO;QACL,SAASC,WAAW,OAAO;QAC3B,OAAOA,WAAW,KAAK;IACzB;AACF;AAEA,MAAME,kBAAkB;AACxB,MAAMC,oBAAoBC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAG5B,SAASC,cACdC,MAA2B,EAC3BC,KAAa,EACbC,MAAc;IAGd,IAAKF,OAAe,OAAO,IAAI,CAACA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,AAAD,GAAG;QAC5CA,OAAO,IAAI,GAAIA,OAAe,OAAO;QAErC,OAAQA,OAAe,OAAO;IAChC;IAEA,IAAIA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,EACdA,OAAO,IAAI,GAAGG,UAAUH,OAAO,IAAI,EAAEC,OAAOC;IAG9C,OAAOF;AACT;AAEO,SAASI,cACdC,IAAc;IAEd,IAAIA,KAAK,MAAM,GAAG,GAAG;QACnB,MAAMC,MAAM,CAAC,oCAAoC,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;QAC1E,MAAM,IAAIG,MAAMF;IAClB;IAEA,MAAMG,SAA2C;QAC/CC,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QACC,YAAnB,OAAOA,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGT;QACN,YAAnB,OAAOS,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGT;KAC1B;IACD,OAAOa;AACT;AAEO,SAASE,gBACdN,IAAkC,EAClCJ,KAAa,EACbC,MAAc;IAEdU,IAAAA,sBAAAA,MAAAA,AAAAA,EACEX,QAAQ,KAAKC,SAAS,GACtB;IAGF,IAAI,AAAgB,YAAhB,OAAOG,MAAmB;QAC5BO,IAAAA,sBAAAA,MAAAA,AAAAA,EACE,+BAA+B,IAAI,CAACP,KAAK,IAAI,KAC7C,CAAC,iDAAiD,EAAEA,MAAM;QAE5D,MAAMQ,WAAWR,KAAK,KAAK,CAAC;QAC5B,IAAIQ,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACLH,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIZ,QAAS;YAC3CS,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIX,SAAU;YAC5CQ,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIZ,QAAS;YAC3CS,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIX,SAAU;SAC7C;QAEH,MAAM,IAAIM,MAAM,CAAC,iDAAiD,EAAEH,MAAM;IAC5E;IAEA,IAAIU,MAAM,OAAO,CAACV,SAASU,MAAM,OAAO,CAACV,IAAI,CAAC,EAAE,GAC9CA,OAAOA,IAAI,CAAC,EAAE;IAGhB,IAAIW,WAAqB,EAAE;IAC3B,IAAID,MAAM,OAAO,CAACV,SAAS,AAAmB,YAAnB,OAAOA,IAAI,CAAC,EAAE,EACvCA,KAAK,OAAO,CAAC,CAACY;QACZ,IAAI,AAAgB,YAAhB,OAAOA,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YAClD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OAAO,IAAI,AAAgB,YAAhB,OAAOF,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YACzD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OACEH,SAAS,IAAI,CAACF,OAAOG;IAEzB;SAEAD,WAAWX;IAGb,IAAIW,AAAoB,MAApBA,SAAS,MAAM,IAAUA,AAAoB,MAApBA,SAAS,MAAM,EAC1C,OAAO;QACLN,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;QACpCQ,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;KACrC;IAIH,IACEc,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,EAEf,OAAO;QACLN,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS,QAAQL,kBAAkB;QAE/Dc,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU,QAAQN,kBAAkB;QAEhEc,KAAK,GAAG,CACNT,OACAS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS,QAAQL,kBAAkB;QAE/Dc,KAAK,GAAG,CACNR,QACAQ,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU,QAAQN,kBAAkB;KAEjE;IAGH,IAAIS,AAAgB,MAAhBA,KAAK,MAAM,EACb,OAAO;QACLK,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;QACpCQ,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;KACrC;IAGH,MAAMI,MAAM,CAAC,0CAA0C,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;IAChF,MAAM,IAAIG,MAAMF;AAClB;AAEO,SAASH,UACdE,IAAc,EACdJ,KAAa,EACbC,MAAc;IAEd,IAAIkB,AAAmB,oBAAnBA,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,OAAsCA,AAAmB,kBAAnBA,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,KACxC,OAAOT,gBAAgBN,MAAMJ,OAAOC;IAGtC,IAAIkB,AAAmB,aAAnBA,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,KACF,OAAOC,gBAAgBhB,MAAMJ,OAAOC;IAGtC,OAAOE,cAAcC;AACvB;AAEO,SAASgB,gBACdhB,IAAc,EACdJ,KAAa,EACbC,MAAc;IAEd,MAAMoB,OAAOZ,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGJ,QAAS;IAC5C,MAAMsB,MAAMb,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGH,SAAU;IAC5C,MAAMsB,QAAQd,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGJ,QAAS;IAC7C,MAAMwB,SAASf,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGH,SAAU;IAC/C,OAAO;QAACoB;QAAMC;QAAKC;QAAOC;KAAO;AACnC;AAEO,SAASC,gBACdrB,IAAc,EACdJ,KAAa,EACbC,MAAc,EACdyB,UAAU,CAAC,EACXC,UAAU,CAAC;IAEX/B,kBAAkB,mBAAmBQ,MAAMJ,OAAOC,QAAQyB,SAASC;IACnE,MAAM,CAACN,MAAMC,KAAKC,OAAOC,OAAO,GAAGtB,UAAUE,MAAMJ,OAAOC;IAC1D,MAAM2B,OAAO;QACX,MAAMP,OAAOK;QACb,KAAKJ,MAAMK;QACX,OAAOJ,QAAQF;QACf,QAAQG,SAASF;IACnB;IACA1B,kBAAkB,4BAA4BgC;IAC9C,OAAOA;AACT;AAEA,IAAIC,SAAS;AACN,SAASC,mBAAmBC,IAAU;QAEvCC;IADJ,IAAIH,QAAQ;IACZ,IAAI,QAAAG,CAAAA,gBAAAA,AAAAA,IAAAA,yBAAAA,YAAAA,AAAAA,GAAa,IAAbA,KAAAA,IAAAA,cAAgB,WAAW,GAAG,QAAQ,CAAC,WAAW;QACpD,MAAMC,aAAa,CAAC,uEAAuE,EAAEF,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,wFAAwF,CAAC;QAEhN,IACEtB,KAAK,GAAG,CAACsB,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,QACpCtB,KAAK,GAAG,CAACsB,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,KACpC;YACAG,QAAQ,IAAI,CAACD;YACbJ,SAAS;QACX;IACF,OAAO,IAAIE,KAAK,KAAK,GAAG,QAAQA,KAAK,MAAM,GAAG,MAAM;QAClDG,QAAQ,IAAI,CACV,CAAC,gCAAgC,EAAEH,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,2EAA2E,CAAC;QAE3IF,SAAS;IACX;AACF;AAEO,SAASM,WAAWC,KAAa;IACtC,MAAMC,UAAU5B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI;IACnD,MAAMC,SAAS9B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG;IACjD,MAAME,WAAW/B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI,GAAGA,EAAE,KAAK;IAC9D,MAAMG,YAAYhC,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG,GAAGA,EAAE,MAAM;IAC/D,OAAO;QACL,MAAMD;QACN,KAAKE;QACL,OAAOC,WAAWH;QAClB,QAAQI,YAAYF;IACtB;AACF;AAGO,SAASG,iBAAiBd,IAAU,EAAEe,UAAgB;IAC3D,MAAMC,cAAczB,AAAmB,oBAAnBA,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,MAAqC,MAAM;IAC/D,MAAM0B,iBAAiB;IAEvB,MAAMC,wBACJlB,KAAK,KAAK,GAAGgB,cACTnC,KAAK,IAAI,CAAEmC,AAAAA,CAAAA,cAAchB,KAAK,KAAI,IAAK,KACvCiB;IACN,MAAME,sBACJnB,KAAK,MAAM,GAAGgB,cACVnC,KAAK,IAAI,CAAEmC,AAAAA,CAAAA,cAAchB,KAAK,MAAK,IAAK,KACxCiB;IACNjB,KAAK,IAAI,GAAGnB,KAAK,GAAG,CAAC,GAAGmB,KAAK,IAAI,GAAGkB;IACpClB,KAAK,KAAK,GAAGnB,KAAK,GAAG,CACnBmB,KAAK,KAAK,GAAGkB,AAAwB,IAAxBA,uBACbH,WAAW,KAAK,GAAGf,KAAK,IAAI;IAE9BA,KAAK,GAAG,GAAGnB,KAAK,GAAG,CAAC,GAAGmB,KAAK,GAAG,GAAGmB;IAClCnB,KAAK,MAAM,GAAGnB,KAAK,GAAG,CACpBmB,KAAK,MAAM,GAAGmB,AAAsB,IAAtBA,qBACdJ,WAAW,MAAM,GAAGf,KAAK,GAAG;IAE9B,OAAOA;AACT;AAEO,eAAeoB,kBACpBC,gBAAwB,EACxBC,IAAkC,EAClCnB,IAAU;IAEV,MAAMoB,eAAeC,AAAAA,IAAAA,0BAAAA,UAAAA,AAAAA,EAAWF;IAChC,MAAMG,kCAAkCF,aAAc,MAAM,CAC1D,CAACG;QACC,IAAIA,YAAY,UAAU,CAAC,QAAQ,KAAKC,0BAAAA,QAAAA,CAAAA,IAAa,EACnD,OAAO;QAET,OAAO;IACT;IAGF,MAAMC,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,uBAAAA,AAAAA,EAAwB;QACjD,gBAAgBR;QAChB,sBAAsBI;QACtBtB;IACF;IACA,OAAOyB;AACT;AAEO,SAASE,uBACdC,KAAuB,EACvBC,KAAc;IAEd,MAAMC,OAA+B,EAAE;IAEvC,KAAK,MAAMC,QAAQH,MAAO;YAETI;QADf,MAAMC,OAAOF,KAAK,IAAI;QACtB,MAAM/D,SAAS,QAAAgE,CAAAA,eAAAA,KAAK,MAAM,AAAD,IAAVA,KAAAA,IAAAA,aAAa,MAAM;QAElC,IAAIC,AAAS,UAATA,MACFH,KAAK,IAAI,CAAC;YACR,OAAO9D;QACT;aACK,IAAIiE,AAAS,YAATA,MACTH,KAAK,IAAI,CAAC;YACR,SAAS9D;QACX;aACK,IAAIiE,AAAS,YAATA,MAAkB;YAC3B,MAAMC,QAAQH,KAAK,KAAK;YACxBD,KAAK,IAAI,CAAC;gBACR,SAASI,MAAM,KAAK;gBACpBlE;YACF;QACF,OAAO,IAAIiE,AAAS,oBAATA,MAA0B;YACnC,MAAMC,QAAQH,KAAK,KAAK;YACxBD,KAAK,IAAI,CAAC;gBACR,iBAAiBI,MAAM,KAAK;gBAC5BlE;YACF;QACF,OAAO,IAAIiE,AAAS,aAATA,MAAmB;YAC5B,MAAMC,QAAQH,KAAK,KAAK;YACxBD,KAAK,IAAI,CAAC;gBACR,UAAU;gBACV9D;gBACA,WAAWkE,MAAM,SAAS;gBAC1B,YAAYA,MAAM,UAAU;gBAC5B,UAAUA,MAAM,QAAQ;YAC1B;QACF,OAAO,IAAID,AAAS,YAATA,MAAkB;YAC3B,MAAMC,QAAQH,KAAK,KAAK;YACxBD,KAAK,IAAI,CAAC;gBACR,OAAOI,MAAM,MAAM;YACrB;QACF,OACW,wBAATD,QACAA,AAAS,wBAATA,QACAA,AAAS,8BAATA,QACAA,AAAS,uBAATA,QACAA,AAAS,kBAATA,QAIAA,AAAS,YAATA,QACAA,AAAS,aAATA,QACAA,AAAS,yBAATA,QACAA,AAAS,eAATA,QAIA9B,QAAQ,IAAI,CACV,CAAC,sBAAsB,EAAE8B,KAAK,gDAAgD,CAAC;IAGrF;IAEA,IAAIJ,OACFC,KAAK,IAAI,CAAC;QACR,OAAOD;IACT;IAGF,OAAOC;AACT"}
|
|
@@ -29,8 +29,8 @@ __webpack_require__.d(__webpack_exports__, {
|
|
|
29
29
|
generateYamlTest: ()=>yaml_generator_js_namespaceObject.generateYamlTest,
|
|
30
30
|
elementByPositionWithElementInfo: ()=>util_js_namespaceObject.elementByPositionWithElementInfo,
|
|
31
31
|
AiLocateSection: ()=>external_inspect_js_namespaceObject.AiLocateSection,
|
|
32
|
-
AIActionType: ()=>external_common_js_namespaceObject.AIActionType,
|
|
33
32
|
callAi: ()=>index_js_namespaceObject.call,
|
|
33
|
+
AIActionType: ()=>external_common_js_namespaceObject.AIActionType,
|
|
34
34
|
generatePlaywrightTest: ()=>playwright_generator_js_namespaceObject.generatePlaywrightTest,
|
|
35
35
|
generateYamlTestStream: ()=>yaml_generator_js_namespaceObject.generateYamlTestStream,
|
|
36
36
|
AiExtractElementInfo: ()=>external_inspect_js_namespaceObject.AiExtractElementInfo,
|
|
@@ -38,7 +38,6 @@ __webpack_require__.d(__webpack_exports__, {
|
|
|
38
38
|
systemPromptToLocateElement: ()=>llm_locator_js_namespaceObject.systemPromptToLocateElement,
|
|
39
39
|
vlmPlanning: ()=>external_ui_tars_planning_js_namespaceObject.vlmPlanning,
|
|
40
40
|
AiAssert: ()=>external_inspect_js_namespaceObject.AiAssert,
|
|
41
|
-
actionSpaceTypePrefix: ()=>external_common_js_namespaceObject.actionSpaceTypePrefix,
|
|
42
41
|
adaptBboxToRect: ()=>external_common_js_namespaceObject.adaptBboxToRect,
|
|
43
42
|
AiLocateElement: ()=>external_inspect_js_namespaceObject.AiLocateElement,
|
|
44
43
|
callAiFn: ()=>external_common_js_namespaceObject.callAiFn,
|
|
@@ -60,7 +59,6 @@ exports.AiAssert = __webpack_exports__.AiAssert;
|
|
|
60
59
|
exports.AiExtractElementInfo = __webpack_exports__.AiExtractElementInfo;
|
|
61
60
|
exports.AiLocateElement = __webpack_exports__.AiLocateElement;
|
|
62
61
|
exports.AiLocateSection = __webpack_exports__.AiLocateSection;
|
|
63
|
-
exports.actionSpaceTypePrefix = __webpack_exports__.actionSpaceTypePrefix;
|
|
64
62
|
exports.adaptBboxToRect = __webpack_exports__.adaptBboxToRect;
|
|
65
63
|
exports.callAi = __webpack_exports__.callAi;
|
|
66
64
|
exports.callAiFn = __webpack_exports__.callAiFn;
|
|
@@ -82,7 +80,6 @@ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
|
|
|
82
80
|
"AiExtractElementInfo",
|
|
83
81
|
"AiLocateElement",
|
|
84
82
|
"AiLocateSection",
|
|
85
|
-
"actionSpaceTypePrefix",
|
|
86
83
|
"adaptBboxToRect",
|
|
87
84
|
"callAi",
|
|
88
85
|
"callAiFn",
|
|
@@ -84,7 +84,7 @@ async function plan(userInstruction, opts) {
|
|
|
84
84
|
actions,
|
|
85
85
|
rawResponse,
|
|
86
86
|
usage,
|
|
87
|
-
yamlFlow: (0, external_common_js_namespaceObject.buildYamlFlowFromPlans)(actions,
|
|
87
|
+
yamlFlow: (0, external_common_js_namespaceObject.buildYamlFlowFromPlans)(actions, planFromAI.sleep)
|
|
88
88
|
};
|
|
89
89
|
(0, utils_namespaceObject.assert)(planFromAI, "can't get plans from AI");
|
|
90
90
|
if ((0, env_namespaceObject.vlLocateMode)()) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/llm-planning.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n DeviceAction,\n PageType,\n PlanningAIResponse,\n UIContext,\n} from '@/types';\nimport { vlLocateMode } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { assert } from '@midscene/shared/utils';\nimport {\n AIActionType,\n type AIArgs,\n buildYamlFlowFromPlans,\n callAiFn,\n fillBboxParam,\n markupImageForLLM,\n warnGPT4oSizeLimit,\n} from './common';\nimport {\n automationUserPrompt,\n generateTaskBackgroundContext,\n systemPromptToTaskPlanning,\n} from './prompt/llm-planning';\nimport { describeUserPage } from './prompt/util';\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n pageType: PageType;\n actionSpace: DeviceAction[];\n callAI?: typeof callAiFn<PlanningAIResponse>;\n log?: string;\n actionContext?: string;\n },\n): Promise<PlanningAIResponse> {\n const { callAI, context } = opts || {};\n const { screenshotBase64, size } = context;\n const { description: pageDescription, elementById } =\n await describeUserPage(context);\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n vlMode: vlLocateMode(),\n });\n const taskBackgroundContextText = generateTaskBackgroundContext(\n userInstruction,\n opts.log,\n opts.actionContext,\n );\n const userInstructionPrompt = await automationUserPrompt(\n vlLocateMode(),\n ).format({\n pageDescription,\n taskBackgroundContext: taskBackgroundContextText,\n });\n\n let imagePayload = screenshotBase64;\n if (vlLocateMode() === 'qwen-vl') {\n imagePayload = await paddingToMatchBlockByBase64(imagePayload);\n } else if (!vlLocateMode()) {\n imagePayload = await markupImageForLLM(\n screenshotBase64,\n context.tree,\n context.size,\n );\n }\n\n warnGPT4oSizeLimit(size);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n const call = callAI || callAiFn;\n const { content, usage } = await call(msgs, AIActionType.PLAN);\n const rawResponse = JSON.stringify(content, undefined, 2);\n const planFromAI = content;\n\n const actions =\n (planFromAI.action?.type ? [planFromAI.action] : planFromAI.actions) || [];\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n yamlFlow: buildYamlFlowFromPlans(
|
|
1
|
+
{"version":3,"file":"ai-model/llm-planning.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n DeviceAction,\n PageType,\n PlanningAIResponse,\n UIContext,\n} from '@/types';\nimport { vlLocateMode } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { assert } from '@midscene/shared/utils';\nimport {\n AIActionType,\n type AIArgs,\n buildYamlFlowFromPlans,\n callAiFn,\n fillBboxParam,\n markupImageForLLM,\n warnGPT4oSizeLimit,\n} from './common';\nimport {\n automationUserPrompt,\n generateTaskBackgroundContext,\n systemPromptToTaskPlanning,\n} from './prompt/llm-planning';\nimport { describeUserPage } from './prompt/util';\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n pageType: PageType;\n actionSpace: DeviceAction[];\n callAI?: typeof callAiFn<PlanningAIResponse>;\n log?: string;\n actionContext?: string;\n },\n): Promise<PlanningAIResponse> {\n const { callAI, context } = opts || {};\n const { screenshotBase64, size } = context;\n const { description: pageDescription, elementById } =\n await describeUserPage(context);\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n vlMode: vlLocateMode(),\n });\n const taskBackgroundContextText = generateTaskBackgroundContext(\n userInstruction,\n opts.log,\n opts.actionContext,\n );\n const userInstructionPrompt = await automationUserPrompt(\n vlLocateMode(),\n ).format({\n pageDescription,\n taskBackgroundContext: taskBackgroundContextText,\n });\n\n let imagePayload = screenshotBase64;\n if (vlLocateMode() === 'qwen-vl') {\n imagePayload = await paddingToMatchBlockByBase64(imagePayload);\n } else if (!vlLocateMode()) {\n imagePayload = await markupImageForLLM(\n screenshotBase64,\n context.tree,\n context.size,\n );\n }\n\n warnGPT4oSizeLimit(size);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n const call = callAI || callAiFn;\n const { content, usage } = await call(msgs, AIActionType.PLAN);\n const rawResponse = JSON.stringify(content, undefined, 2);\n const planFromAI = content;\n\n const actions =\n (planFromAI.action?.type ? [planFromAI.action] : planFromAI.actions) || [];\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n yamlFlow: buildYamlFlowFromPlans(actions, planFromAI.sleep),\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n if (vlLocateMode()) {\n actions.forEach((action) => {\n if (action.locate) {\n try {\n action.locate = fillBboxParam(action.locate, size.width, size.height);\n } catch (e) {\n throw new Error(\n `Failed to fill locate param: ${planFromAI.error} (${\n e instanceof Error ? e.message : 'unknown error'\n })`,\n {\n cause: e,\n },\n );\n }\n }\n });\n // in Qwen-VL, error means error. In GPT-4o, error may mean more actions are needed.\n assert(!planFromAI.error, `Failed to plan actions: ${planFromAI.error}`);\n } else {\n actions.forEach((action) => {\n if (action.locate?.id) {\n // The model may return indexId, need to perform a query correction to avoid exceptions\n const element = elementById(action.locate.id);\n if (element) {\n action.locate.id = element.id;\n }\n }\n });\n }\n\n if (\n actions.length === 0 &&\n returnValue.more_actions_needed_by_instruction &&\n !returnValue.sleep\n ) {\n console.warn(\n 'No actions planned for the prompt, but model said more actions are needed:',\n userInstruction,\n );\n }\n\n return returnValue;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","plan","userInstruction","opts","_planFromAI_action","callAI","context","screenshotBase64","size","pageDescription","elementById","describeUserPage","systemPrompt","systemPromptToTaskPlanning","vlLocateMode","taskBackgroundContextText","generateTaskBackgroundContext","userInstructionPrompt","automationUserPrompt","imagePayload","paddingToMatchBlockByBase64","markupImageForLLM","warnGPT4oSizeLimit","msgs","call","callAiFn","content","usage","AIActionType","rawResponse","JSON","undefined","planFromAI","actions","returnValue","buildYamlFlowFromPlans","assert","action","fillBboxParam","e","Error","_action_locate","element","console"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;ACmBO,eAAeI,KACpBC,eAAuB,EACvBC,IAOC;QA8DEC;IA5DH,MAAM,EAAEC,MAAM,EAAEC,OAAO,EAAE,GAAGH,QAAQ,CAAC;IACrC,MAAM,EAAEI,gBAAgB,EAAEC,IAAI,EAAE,GAAGF;IACnC,MAAM,EAAE,aAAaG,eAAe,EAAEC,WAAW,EAAE,GACjD,MAAMC,AAAAA,IAAAA,wBAAAA,gBAAAA,AAAAA,EAAiBL;IAEzB,MAAMM,eAAe,MAAMC,AAAAA,IAAAA,gCAAAA,0BAAAA,AAAAA,EAA2B;QACpD,aAAaV,KAAK,WAAW;QAC7B,QAAQW,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA;IACV;IACA,MAAMC,4BAA4BC,AAAAA,IAAAA,gCAAAA,6BAAAA,AAAAA,EAChCd,iBACAC,KAAK,GAAG,EACRA,KAAK,aAAa;IAEpB,MAAMc,wBAAwB,MAAMC,AAAAA,IAAAA,gCAAAA,oBAAAA,AAAAA,EAClCJ,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,KACA,MAAM,CAAC;QACPL;QACA,uBAAuBM;IACzB;IAEA,IAAII,eAAeZ;IACnB,IAAIO,AAAmB,cAAnBA,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,KACFK,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BD;SAC5C,IAAI,CAACL,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,KACVK,eAAe,MAAME,AAAAA,IAAAA,mCAAAA,iBAAAA,AAAAA,EACnBd,kBACAD,QAAQ,IAAI,EACZA,QAAQ,IAAI;IAIhBgB,IAAAA,mCAAAA,kBAAAA,AAAAA,EAAmBd;IAEnB,MAAMe,OAAe;QACnB;YAAE,MAAM;YAAU,SAASX;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKO;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMF;gBACR;aACD;QACH;KACD;IAED,MAAMO,OAAOnB,UAAUoB,mCAAAA,QAAQA;IAC/B,MAAM,EAAEC,OAAO,EAAEC,KAAK,EAAE,GAAG,MAAMH,KAAKD,MAAMK,mCAAAA,YAAAA,CAAAA,IAAiB;IAC7D,MAAMC,cAAcC,KAAK,SAAS,CAACJ,SAASK,QAAW;IACvD,MAAMC,aAAaN;IAEnB,MAAMO,UACH7B,AAAAA,CAAAA,SAAAA,CAAAA,qBAAAA,WAAW,MAAM,AAAD,IAAhBA,KAAAA,IAAAA,mBAAmB,IAAI,AAAD,IAAI;QAAC4B,WAAW,MAAM;KAAC,GAAGA,WAAW,OAAM,KAAM,EAAE;IAC5E,MAAME,cAAkC;QACtC,GAAGF,UAAU;QACbC;QACAJ;QACAF;QACA,UAAUQ,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EAAuBF,SAASD,WAAW,KAAK;IAC5D;IAEAI,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOJ,YAAY;IAEnB,IAAIlB,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,KAAgB;QAClBmB,QAAQ,OAAO,CAAC,CAACI;YACf,IAAIA,OAAO,MAAM,EACf,IAAI;gBACFA,OAAO,MAAM,GAAGC,AAAAA,IAAAA,mCAAAA,aAAAA,AAAAA,EAAcD,OAAO,MAAM,EAAE7B,KAAK,KAAK,EAAEA,KAAK,MAAM;YACtE,EAAE,OAAO+B,GAAG;gBACV,MAAM,IAAIC,MACR,CAAC,6BAA6B,EAAER,WAAW,KAAK,CAAC,EAAE,EACjDO,aAAaC,QAAQD,EAAE,OAAO,GAAG,gBAClC,CAAC,CAAC,EACH;oBACE,OAAOA;gBACT;YAEJ;QAEJ;QAEAH,IAAAA,sBAAAA,MAAAA,AAAAA,EAAO,CAACJ,WAAW,KAAK,EAAE,CAAC,wBAAwB,EAAEA,WAAW,KAAK,EAAE;IACzE,OACEC,QAAQ,OAAO,CAAC,CAACI;YACXI;QAAJ,IAAI,QAAAA,CAAAA,iBAAAA,OAAO,MAAM,AAAD,IAAZA,KAAAA,IAAAA,eAAe,EAAE,EAAE;YAErB,MAAMC,UAAUhC,YAAY2B,OAAO,MAAM,CAAC,EAAE;YAC5C,IAAIK,SACFL,OAAO,MAAM,CAAC,EAAE,GAAGK,QAAQ,EAAE;QAEjC;IACF;IAGF,IACET,AAAmB,MAAnBA,QAAQ,MAAM,IACdC,YAAY,kCAAkC,IAC9C,CAACA,YAAY,KAAK,EAElBS,QAAQ,IAAI,CACV,8EACAzC;IAIJ,OAAOgC;AACT"}
|
package/dist/lib/types.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.js","sources":["webpack://@midscene/core/webpack/runtime/compat_get_default_export","webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/types.ts"],"sourcesContent":["// getDefaultExport function for compatibility with non-ESM modules\n__webpack_require__.n = (module) => {\n\tvar getter = module && module.__esModule ?\n\t\t() => (module['default']) :\n\t\t() => (module);\n\t__webpack_require__.d(getter, { a: getter });\n\treturn getter;\n};\n","__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","/* eslint-disable @typescript-eslint/no-explicit-any */\n\nimport type { NodeType } from '@midscene/shared/constants';\nimport type {\n BaseElement,\n ElementTreeNode,\n Rect,\n Size,\n} from '@midscene/shared/types';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { DetailedLocateParam, MidsceneYamlFlowItem } from './yaml';\n\nexport type {\n ElementTreeNode,\n BaseElement,\n Rect,\n Size,\n Point,\n} from '@midscene/shared/types';\nexport * from './yaml';\n\nexport type AIUsageInfo = Record<string, any> & {\n prompt_tokens: number | undefined;\n completion_tokens: number | undefined;\n total_tokens: number | undefined;\n time_cost: number | undefined;\n};\n\n/**\n * openai\n *\n */\nexport enum AIResponseFormat {\n JSON = 'json_object',\n TEXT = 'text',\n}\n\nexport type AISingleElementResponseById = {\n id: string;\n reason?: string;\n text?: string;\n xpaths?: string[];\n};\n\nexport type AISingleElementResponseByPosition = {\n position?: {\n x: number;\n y: number;\n };\n bbox?: [number, number, number, number];\n reason: string;\n text: string;\n};\n\nexport type AISingleElementResponse = AISingleElementResponseById;\nexport interface AIElementLocatorResponse {\n elements: {\n id: string;\n reason?: string;\n text?: string;\n xpaths?: string[];\n }[];\n bbox?: [number, number, number, number];\n isOrderSensitive?: boolean;\n errors?: string[];\n}\n\nexport interface AIElementCoordinatesResponse {\n bbox: [number, number, number, number];\n isOrderSensitive?: boolean;\n errors?: string[];\n}\n\nexport type AIElementResponse =\n | AIElementLocatorResponse\n | AIElementCoordinatesResponse;\n\nexport interface AIDataExtractionResponse<DataDemand> {\n data: DataDemand;\n errors?: string[];\n thought?: string;\n}\n\nexport interface AISectionLocatorResponse {\n bbox: [number, number, number, number];\n references_bbox?: [number, number, number, number][];\n error?: string;\n}\n\nexport interface AIAssertionResponse {\n pass: boolean;\n thought: string;\n}\n\nexport interface AIDescribeElementResponse {\n description: string;\n error?: string;\n}\n\nexport interface LocatorValidatorOption {\n centerDistanceThreshold?: number;\n}\n\nexport interface LocateValidatorResult {\n pass: boolean;\n rect: Rect;\n center: [number, number];\n centerDistance?: number;\n}\n\nexport interface AgentDescribeElementAtPointResult {\n prompt: string;\n deepThink: boolean;\n verifyResult?: LocateValidatorResult;\n}\n\n/**\n * context\n */\n\nexport abstract class UIContext<ElementType extends BaseElement = BaseElement> {\n abstract screenshotBase64: string;\n\n abstract tree: ElementTreeNode<ElementType>;\n\n abstract size: Size;\n}\n\n/**\n * insight\n */\n\nexport type CallAIFn = <T>(\n messages: ChatCompletionMessageParam[],\n) => Promise<T>;\n\nexport interface InsightOptions {\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n aiVendorFn?: CallAIFn;\n}\n\nexport type EnsureObject<T> = { [K in keyof T]: any };\n\nexport type InsightAction = 'locate' | 'extract' | 'assert' | 'describe';\n\nexport type InsightExtractParam = string | Record<string, string>;\n\nexport type LocateResultElement = {\n id: string;\n indexId?: number;\n center: [number, number];\n rect: Rect;\n xpaths: string[];\n attributes: {\n nodeType: NodeType;\n [key: string]: string;\n };\n isOrderSensitive?: boolean;\n};\n\nexport interface LocateResult {\n element: LocateResultElement | null;\n rect?: Rect;\n}\n\nexport interface InsightTaskInfo {\n durationMs: number;\n formatResponse?: string;\n rawResponse?: string;\n usage?: AIUsageInfo;\n searchArea?: Rect;\n searchAreaRawResponse?: string;\n searchAreaUsage?: AIUsageInfo;\n}\n\nexport interface DumpMeta {\n sdkVersion: string;\n logTime: number;\n model_name: string;\n model_description?: string;\n}\n\nexport interface ReportDumpWithAttributes {\n dumpString: string;\n attributes?: Record<string, any>;\n}\n\nexport interface InsightDump extends DumpMeta {\n type: 'locate' | 'extract' | 'assert';\n logId: string;\n userQuery: {\n element?: TUserPrompt;\n dataDemand?: InsightExtractParam;\n assertion?: TUserPrompt;\n };\n matchedElement: BaseElement[];\n matchedRect?: Rect;\n deepThink?: boolean;\n data: any;\n assertionPass?: boolean;\n assertionThought?: string;\n taskInfo: InsightTaskInfo;\n error?: string;\n output?: any;\n}\n\nexport type PartialInsightDumpFromSDK = Omit<\n InsightDump,\n 'sdkVersion' | 'logTime' | 'logId' | 'model_name'\n>;\n\nexport type DumpSubscriber = (dump: InsightDump) => Promise<void> | void;\n\n// intermediate variables to optimize the return value by AI\nexport interface LiteUISection {\n name: string;\n description: string;\n sectionCharacteristics: string;\n textIds: string[];\n}\n\nexport type ElementById = (id: string) => BaseElement | null;\n\nexport type InsightAssertionResponse = AIAssertionResponse & {\n usage?: AIUsageInfo;\n};\n\n/**\n * agent\n */\n\nexport type OnTaskStartTip = (tip: string) => Promise<void> | void;\n\nexport interface AgentWaitForOpt {\n checkIntervalMs?: number;\n timeoutMs?: number;\n}\n\nexport interface AgentAssertOpt {\n keepRawResponse?: boolean;\n}\n\n/**\n * planning\n *\n */\n\nexport interface PlanningLocateParam extends DetailedLocateParam {\n id?: string;\n bbox?: [number, number, number, number];\n}\n\nexport interface PlanningAction<ParamType = any> {\n thought?: string;\n type:\n | 'Locate'\n | 'Tap'\n | 'RightClick'\n | 'Hover'\n | 'Drag'\n | 'Input'\n | 'KeyboardPress'\n | 'Scroll'\n | 'Error'\n | 'Assert'\n | 'AssertWithoutThrow'\n | 'Sleep'\n | 'Finished'\n | 'AndroidBackButton'\n | 'AndroidHomeButton'\n | 'AndroidRecentAppsButton'\n | 'AndroidLongPress'\n | 'AndroidPull';\n param: ParamType;\n locate?: PlanningLocateParam | null;\n}\n\nexport interface PlanningAIResponse {\n action?: PlanningAction; // this is the qwen mode\n actions?: PlanningAction[];\n more_actions_needed_by_instruction: boolean;\n log: string;\n sleep?: number;\n error?: string;\n usage?: AIUsageInfo;\n rawResponse?: string;\n yamlFlow?: MidsceneYamlFlowItem[];\n yamlString?: string;\n}\n\nexport type PlanningActionParamTap = null;\nexport type PlanningActionParamHover = null;\nexport type PlanningActionParamRightClick = null;\n\nexport interface PlanningActionParamInputOrKeyPress {\n value: string;\n autoDismissKeyboard?: boolean;\n}\n\nexport interface PlanningActionParamAssert {\n assertion: TUserPrompt;\n}\n\nexport interface PlanningActionParamSleep {\n timeMs: number;\n}\n\nexport interface PlanningActionParamError {\n thought: string;\n}\n\nexport type PlanningActionParamWaitFor = AgentWaitForOpt & {\n assertion: string;\n};\n\nexport interface AndroidLongPressParam {\n duration?: number;\n}\n\nexport interface AndroidPullParam {\n direction: 'up' | 'down';\n distance?: number;\n duration?: number;\n}\n/**\n * misc\n */\n\nexport interface Color {\n name: string;\n hex: string;\n}\n\nexport interface BaseAgentParserOpt {\n selector?: string;\n ignoreMarker?: boolean;\n}\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PuppeteerParserOpt extends BaseAgentParserOpt {}\n\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PlaywrightParserOpt extends BaseAgentParserOpt {}\n\n/*\naction\n*/\nexport interface ExecutionTaskProgressOptions {\n onTaskStart?: (task: ExecutionTask) => Promise<void> | void;\n}\n\nexport interface ExecutionRecorderItem {\n type: 'screenshot';\n ts: number;\n screenshot?: string;\n timing?: string;\n}\n\nexport type ExecutionTaskType =\n | 'Planning'\n | 'Insight'\n | 'Action'\n | 'Assertion'\n | 'Log';\n\nexport interface ExecutorContext {\n task: ExecutionTask;\n element?: LocateResultElement | null;\n}\n\nexport interface ExecutionTaskApply<\n Type extends ExecutionTaskType = any,\n TaskParam = any,\n TaskOutput = any,\n TaskLog = any,\n> {\n type: Type;\n subType?: string;\n param?: TaskParam;\n thought?: string;\n locate?: PlanningLocateParam | null;\n pageContext?: UIContext;\n executor: (\n param: TaskParam,\n context: ExecutorContext,\n ) => // biome-ignore lint/suspicious/noConfusingVoidType: <explanation>\n | Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void>\n | undefined\n | void;\n}\n\nexport interface ExecutionTaskHitBy {\n from: string;\n context: Record<string, any>;\n}\n\nexport interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {\n output?: TaskOutput;\n log?: TaskLog;\n recorder?: ExecutionRecorderItem[];\n hitBy?: ExecutionTaskHitBy;\n}\n\nexport type ExecutionTask<\n E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<\n any,\n any,\n any\n >,\n> = E &\n ExecutionTaskReturn<\n E extends ExecutionTaskApply<any, any, infer TaskOutput, any>\n ? TaskOutput\n : unknown,\n E extends ExecutionTaskApply<any, any, any, infer TaskLog>\n ? TaskLog\n : unknown\n > & {\n status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';\n error?: Error;\n errorMessage?: string;\n errorStack?: string;\n timing?: {\n start: number;\n end?: number;\n cost?: number;\n };\n usage?: AIUsageInfo;\n };\n\nexport interface ExecutionDump extends DumpMeta {\n name: string;\n description?: string;\n tasks: ExecutionTask[];\n}\n\n/*\ntask - insight-locate\n*/\nexport type ExecutionTaskInsightLocateParam = PlanningLocateParam;\n\nexport interface ExecutionTaskInsightLocateOutput {\n element: LocateResultElement | null;\n}\n\nexport interface ExecutionTaskInsightDumpLog {\n dump?: InsightDump;\n}\n\nexport type ExecutionTaskInsightLocateApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightLocateParam,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightLocate =\n ExecutionTask<ExecutionTaskInsightLocateApply>;\n\n/*\ntask - insight-query\n*/\nexport interface ExecutionTaskInsightQueryParam {\n dataDemand: InsightExtractParam;\n}\n\nexport interface ExecutionTaskInsightQueryOutput {\n data: any;\n}\n\nexport type ExecutionTaskInsightQueryApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightQueryParam,\n any,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightQuery =\n ExecutionTask<ExecutionTaskInsightQueryApply>;\n\n/*\ntask - assertion\n*/\nexport interface ExecutionTaskInsightAssertionParam {\n assertion: string;\n}\n\nexport type ExecutionTaskInsightAssertionApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightAssertionParam,\n InsightAssertionResponse,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightAssertion =\n ExecutionTask<ExecutionTaskInsightAssertionApply>;\n\n/*\ntask - action (i.e. interact) \n*/\nexport type ExecutionTaskActionApply<ActionParam = any> = ExecutionTaskApply<\n 'Action',\n ActionParam,\n void,\n void\n>;\n\nexport type ExecutionTaskAction = ExecutionTask<ExecutionTaskActionApply>;\n\n/*\ntask - Log\n*/\n\nexport type ExecutionTaskLogApply<\n LogParam = {\n content: string;\n },\n> = ExecutionTaskApply<'Log', LogParam, void, void>;\n\nexport type ExecutionTaskLog = ExecutionTask<ExecutionTaskLogApply>;\n\n/*\ntask - planning\n*/\n\nexport type ExecutionTaskPlanningApply = ExecutionTaskApply<\n 'Planning',\n {\n userInstruction: string;\n log?: string;\n },\n PlanningAIResponse\n>;\n\nexport type ExecutionTaskPlanning = ExecutionTask<ExecutionTaskPlanningApply>;\n\n/*\nGrouped dump\n*/\nexport interface GroupedActionDump {\n groupName: string;\n groupDescription?: string;\n executions: ExecutionDump[];\n}\n\nexport type PageType =\n | 'puppeteer'\n | 'playwright'\n | 'static'\n | 'chrome-extension-proxy'\n | 'android';\n\nexport interface StreamingCodeGenerationOptions {\n /** Whether to enable streaming output */\n stream?: boolean;\n /** Callback function to handle streaming chunks */\n onChunk?: StreamingCallback;\n /** Callback function to handle streaming completion */\n onComplete?: (finalCode: string) => void;\n /** Callback function to handle streaming errors */\n onError?: (error: Error) => void;\n}\n\nexport type StreamingCallback = (chunk: CodeGenerationChunk) => void;\n\nexport interface CodeGenerationChunk {\n /** The incremental content chunk */\n content: string;\n /** The reasoning content */\n reasoning_content: string;\n /** The accumulated content so far */\n accumulated: string;\n /** Whether this is the final chunk */\n isComplete: boolean;\n /** Token usage information if available */\n usage?: AIUsageInfo;\n}\n\nexport interface StreamingAIResponse {\n /** The final accumulated content */\n content: string;\n /** Token usage information */\n usage?: AIUsageInfo;\n /** Whether the response was streamed */\n isStreamed: boolean;\n}\n\nexport type TMultimodalPrompt = {\n /**\n * Support use image to inspect elements.\n * The \"images\" field is an object that uses image name as key and image url as value.\n * The image url can be a local path, a http link , or a base64 string.\n */\n images?: {\n name: string;\n url: string;\n }[];\n /**\n * By default, the image url in the \"images\" filed starts with `https://` or `http://` will be directly sent to the LLM.\n * In case the images are not accessible to the LLM (One common case is that image url is internal network only.), you can enable this option.\n * Then image will be download and convert to base64 format.\n */\n convertHttpImage2Base64?: boolean;\n};\n\nexport type TUserPrompt =\n | string\n | ({\n prompt: string;\n } & Partial<TMultimodalPrompt>);\n\nexport interface DeviceAction<ParamType = any> {\n name: string;\n interfaceAlias?: string;\n description?: string;\n paramSchema?: string;\n paramDescription?: string;\n location?: 'required' | 'optional' | false;\n whatToLocate?: string; // what to locate if location is required or optional\n call: (context: ExecutorContext, param: ParamType) => Promise<void> | void;\n}\n"],"names":["__webpack_require__","module","getter","definition","key","Object","obj","prop","Symbol","AIResponseFormat","UIContext"],"mappings":";;;;;;;;;;;;;;;;;IACAA,oBAAoB,CAAC,GAAG,CAACC;QACxB,IAAIC,SAASD,UAAUA,OAAO,UAAU,GACvC,IAAOA,MAAM,CAAC,UAAU,GACxB,IAAOA;QACRD,oBAAoB,CAAC,CAACE,QAAQ;YAAE,GAAGA;QAAO;QAC1C,OAAOA;IACR;;;ICPAF,oBAAoB,CAAC,GAAG,CAAC,UAASG;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGH,oBAAoB,CAAC,CAACG,YAAYC,QAAQ,CAACJ,oBAAoB,CAAC,CAAC,UAASI,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAJ,oBAAoB,CAAC,GAAG,CAACM,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFP,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOQ,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;IC0BO,IAAKI,mBAAAA,WAAAA,GAAAA,SAAAA,gBAAgB;;;eAAhBA;;IAwFL,MAAeC;IAMtB"}
|
|
1
|
+
{"version":3,"file":"types.js","sources":["webpack://@midscene/core/webpack/runtime/compat_get_default_export","webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/types.ts"],"sourcesContent":["// getDefaultExport function for compatibility with non-ESM modules\n__webpack_require__.n = (module) => {\n\tvar getter = module && module.__esModule ?\n\t\t() => (module['default']) :\n\t\t() => (module);\n\t__webpack_require__.d(getter, { a: getter });\n\treturn getter;\n};\n","__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","/* eslint-disable @typescript-eslint/no-explicit-any */\n\nimport type { NodeType } from '@midscene/shared/constants';\nimport type {\n BaseElement,\n ElementTreeNode,\n Rect,\n Size,\n} from '@midscene/shared/types';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { DetailedLocateParam, MidsceneYamlFlowItem } from './yaml';\n\nexport type {\n ElementTreeNode,\n BaseElement,\n Rect,\n Size,\n Point,\n} from '@midscene/shared/types';\nexport * from './yaml';\n\nexport type AIUsageInfo = Record<string, any> & {\n prompt_tokens: number | undefined;\n completion_tokens: number | undefined;\n total_tokens: number | undefined;\n time_cost: number | undefined;\n};\n\n/**\n * openai\n *\n */\nexport enum AIResponseFormat {\n JSON = 'json_object',\n TEXT = 'text',\n}\n\nexport type AISingleElementResponseById = {\n id: string;\n reason?: string;\n text?: string;\n xpaths?: string[];\n};\n\nexport type AISingleElementResponseByPosition = {\n position?: {\n x: number;\n y: number;\n };\n bbox?: [number, number, number, number];\n reason: string;\n text: string;\n};\n\nexport type AISingleElementResponse = AISingleElementResponseById;\nexport interface AIElementLocatorResponse {\n elements: {\n id: string;\n reason?: string;\n text?: string;\n xpaths?: string[];\n }[];\n bbox?: [number, number, number, number];\n isOrderSensitive?: boolean;\n errors?: string[];\n}\n\nexport interface AIElementCoordinatesResponse {\n bbox: [number, number, number, number];\n isOrderSensitive?: boolean;\n errors?: string[];\n}\n\nexport type AIElementResponse =\n | AIElementLocatorResponse\n | AIElementCoordinatesResponse;\n\nexport interface AIDataExtractionResponse<DataDemand> {\n data: DataDemand;\n errors?: string[];\n thought?: string;\n}\n\nexport interface AISectionLocatorResponse {\n bbox: [number, number, number, number];\n references_bbox?: [number, number, number, number][];\n error?: string;\n}\n\nexport interface AIAssertionResponse {\n pass: boolean;\n thought: string;\n}\n\nexport interface AIDescribeElementResponse {\n description: string;\n error?: string;\n}\n\nexport interface LocatorValidatorOption {\n centerDistanceThreshold?: number;\n}\n\nexport interface LocateValidatorResult {\n pass: boolean;\n rect: Rect;\n center: [number, number];\n centerDistance?: number;\n}\n\nexport interface AgentDescribeElementAtPointResult {\n prompt: string;\n deepThink: boolean;\n verifyResult?: LocateValidatorResult;\n}\n\n/**\n * context\n */\n\nexport abstract class UIContext<ElementType extends BaseElement = BaseElement> {\n abstract screenshotBase64: string;\n\n abstract tree: ElementTreeNode<ElementType>;\n\n abstract size: Size;\n}\n\n/**\n * insight\n */\n\nexport type CallAIFn = <T>(\n messages: ChatCompletionMessageParam[],\n) => Promise<T>;\n\nexport interface InsightOptions {\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n aiVendorFn?: CallAIFn;\n}\n\nexport type EnsureObject<T> = { [K in keyof T]: any };\n\nexport type InsightAction = 'locate' | 'extract' | 'assert' | 'describe';\n\nexport type InsightExtractParam = string | Record<string, string>;\n\nexport type LocateResultElement = {\n id: string;\n indexId?: number;\n center: [number, number];\n rect: Rect;\n xpaths: string[];\n attributes: {\n nodeType: NodeType;\n [key: string]: string;\n };\n isOrderSensitive?: boolean;\n};\n\nexport interface LocateResult {\n element: LocateResultElement | null;\n rect?: Rect;\n}\n\nexport interface InsightTaskInfo {\n durationMs: number;\n formatResponse?: string;\n rawResponse?: string;\n usage?: AIUsageInfo;\n searchArea?: Rect;\n searchAreaRawResponse?: string;\n searchAreaUsage?: AIUsageInfo;\n}\n\nexport interface DumpMeta {\n sdkVersion: string;\n logTime: number;\n model_name: string;\n model_description?: string;\n}\n\nexport interface ReportDumpWithAttributes {\n dumpString: string;\n attributes?: Record<string, any>;\n}\n\nexport interface InsightDump extends DumpMeta {\n type: 'locate' | 'extract' | 'assert';\n logId: string;\n userQuery: {\n element?: TUserPrompt;\n dataDemand?: InsightExtractParam;\n assertion?: TUserPrompt;\n };\n matchedElement: BaseElement[];\n matchedRect?: Rect;\n deepThink?: boolean;\n data: any;\n assertionPass?: boolean;\n assertionThought?: string;\n taskInfo: InsightTaskInfo;\n error?: string;\n output?: any;\n}\n\nexport type PartialInsightDumpFromSDK = Omit<\n InsightDump,\n 'sdkVersion' | 'logTime' | 'logId' | 'model_name'\n>;\n\nexport type DumpSubscriber = (dump: InsightDump) => Promise<void> | void;\n\n// intermediate variables to optimize the return value by AI\nexport interface LiteUISection {\n name: string;\n description: string;\n sectionCharacteristics: string;\n textIds: string[];\n}\n\nexport type ElementById = (id: string) => BaseElement | null;\n\nexport type InsightAssertionResponse = AIAssertionResponse & {\n usage?: AIUsageInfo;\n};\n\n/**\n * agent\n */\n\nexport type OnTaskStartTip = (tip: string) => Promise<void> | void;\n\nexport interface AgentWaitForOpt {\n checkIntervalMs?: number;\n timeoutMs?: number;\n}\n\nexport interface AgentAssertOpt {\n keepRawResponse?: boolean;\n}\n\n/**\n * planning\n *\n */\n\nexport interface PlanningLocateParam extends DetailedLocateParam {\n id?: string;\n bbox?: [number, number, number, number];\n}\n\nexport interface PlanningAction<ParamType = any> {\n thought?: string;\n type:\n | 'Locate'\n | 'Tap'\n | 'RightClick'\n | 'Hover'\n | 'Drag'\n | 'Input'\n | 'KeyboardPress'\n | 'Scroll'\n | 'Error'\n | 'Assert'\n | 'AssertWithoutThrow'\n | 'Sleep'\n | 'Finished'\n | 'AndroidBackButton'\n | 'AndroidHomeButton'\n | 'AndroidRecentAppsButton'\n | 'AndroidLongPress'\n | 'AndroidPull';\n param: ParamType;\n locate?: PlanningLocateParam | null;\n}\n\nexport interface PlanningAIResponse {\n action?: PlanningAction; // this is the qwen mode\n actions?: PlanningAction[];\n more_actions_needed_by_instruction: boolean;\n log: string;\n sleep?: number;\n error?: string;\n usage?: AIUsageInfo;\n rawResponse?: string;\n yamlFlow?: MidsceneYamlFlowItem[];\n yamlString?: string;\n}\n\nexport type PlanningActionParamTap = null;\nexport type PlanningActionParamHover = null;\nexport type PlanningActionParamRightClick = null;\n\nexport interface PlanningActionParamInputOrKeyPress {\n value: string;\n autoDismissKeyboard?: boolean;\n}\n\nexport interface PlanningActionParamAssert {\n assertion: TUserPrompt;\n}\n\nexport interface PlanningActionParamSleep {\n timeMs: number;\n}\n\nexport interface PlanningActionParamError {\n thought: string;\n}\n\nexport type PlanningActionParamWaitFor = AgentWaitForOpt & {\n assertion: string;\n};\n\nexport interface AndroidLongPressParam {\n duration?: number;\n}\n\nexport interface AndroidPullParam {\n direction: 'up' | 'down';\n distance?: number;\n duration?: number;\n}\n/**\n * misc\n */\n\nexport interface Color {\n name: string;\n hex: string;\n}\n\nexport interface BaseAgentParserOpt {\n selector?: string;\n ignoreMarker?: boolean;\n}\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PuppeteerParserOpt extends BaseAgentParserOpt {}\n\n// eslint-disable-next-line @typescript-eslint/no-empty-interface\nexport interface PlaywrightParserOpt extends BaseAgentParserOpt {}\n\n/*\naction\n*/\nexport interface ExecutionTaskProgressOptions {\n onTaskStart?: (task: ExecutionTask) => Promise<void> | void;\n}\n\nexport interface ExecutionRecorderItem {\n type: 'screenshot';\n ts: number;\n screenshot?: string;\n timing?: string;\n}\n\nexport type ExecutionTaskType =\n | 'Planning'\n | 'Insight'\n | 'Action'\n | 'Assertion'\n | 'Log';\n\nexport interface ExecutorContext {\n task: ExecutionTask;\n element?: LocateResultElement | null;\n}\n\nexport interface ExecutionTaskApply<\n Type extends ExecutionTaskType = any,\n TaskParam = any,\n TaskOutput = any,\n TaskLog = any,\n> {\n type: Type;\n subType?: string;\n param?: TaskParam;\n thought?: string;\n locate?: PlanningLocateParam | null;\n pageContext?: UIContext;\n executor: (\n param: TaskParam,\n context: ExecutorContext,\n ) => // biome-ignore lint/suspicious/noConfusingVoidType: <explanation>\n | Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void>\n | undefined\n | void;\n}\n\nexport interface ExecutionTaskHitBy {\n from: string;\n context: Record<string, any>;\n}\n\nexport interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {\n output?: TaskOutput;\n log?: TaskLog;\n recorder?: ExecutionRecorderItem[];\n hitBy?: ExecutionTaskHitBy;\n}\n\nexport type ExecutionTask<\n E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<\n any,\n any,\n any\n >,\n> = E &\n ExecutionTaskReturn<\n E extends ExecutionTaskApply<any, any, infer TaskOutput, any>\n ? TaskOutput\n : unknown,\n E extends ExecutionTaskApply<any, any, any, infer TaskLog>\n ? TaskLog\n : unknown\n > & {\n status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';\n error?: Error;\n errorMessage?: string;\n errorStack?: string;\n timing?: {\n start: number;\n end?: number;\n cost?: number;\n };\n usage?: AIUsageInfo;\n };\n\nexport interface ExecutionDump extends DumpMeta {\n name: string;\n description?: string;\n tasks: ExecutionTask[];\n}\n\n/*\ntask - insight-locate\n*/\nexport type ExecutionTaskInsightLocateParam = PlanningLocateParam;\n\nexport interface ExecutionTaskInsightLocateOutput {\n element: LocateResultElement | null;\n}\n\nexport interface ExecutionTaskInsightDumpLog {\n dump?: InsightDump;\n}\n\nexport type ExecutionTaskInsightLocateApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightLocateParam,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightLocate =\n ExecutionTask<ExecutionTaskInsightLocateApply>;\n\n/*\ntask - insight-query\n*/\nexport interface ExecutionTaskInsightQueryParam {\n dataDemand: InsightExtractParam;\n}\n\nexport interface ExecutionTaskInsightQueryOutput {\n data: any;\n}\n\nexport type ExecutionTaskInsightQueryApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightQueryParam,\n any,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightQuery =\n ExecutionTask<ExecutionTaskInsightQueryApply>;\n\n/*\ntask - assertion\n*/\nexport interface ExecutionTaskInsightAssertionParam {\n assertion: string;\n}\n\nexport type ExecutionTaskInsightAssertionApply = ExecutionTaskApply<\n 'Insight',\n ExecutionTaskInsightAssertionParam,\n InsightAssertionResponse,\n ExecutionTaskInsightDumpLog\n>;\n\nexport type ExecutionTaskInsightAssertion =\n ExecutionTask<ExecutionTaskInsightAssertionApply>;\n\n/*\ntask - action (i.e. interact) \n*/\nexport type ExecutionTaskActionApply<ActionParam = any> = ExecutionTaskApply<\n 'Action',\n ActionParam,\n void,\n void\n>;\n\nexport type ExecutionTaskAction = ExecutionTask<ExecutionTaskActionApply>;\n\n/*\ntask - Log\n*/\n\nexport type ExecutionTaskLogApply<\n LogParam = {\n content: string;\n },\n> = ExecutionTaskApply<'Log', LogParam, void, void>;\n\nexport type ExecutionTaskLog = ExecutionTask<ExecutionTaskLogApply>;\n\n/*\ntask - planning\n*/\n\nexport type ExecutionTaskPlanningApply = ExecutionTaskApply<\n 'Planning',\n {\n userInstruction: string;\n log?: string;\n },\n PlanningAIResponse\n>;\n\nexport type ExecutionTaskPlanning = ExecutionTask<ExecutionTaskPlanningApply>;\n\n/*\nGrouped dump\n*/\nexport interface GroupedActionDump {\n groupName: string;\n groupDescription?: string;\n executions: ExecutionDump[];\n}\n\nexport type PageType =\n | 'puppeteer'\n | 'playwright'\n | 'static'\n | 'chrome-extension-proxy'\n | 'android';\n\nexport interface StreamingCodeGenerationOptions {\n /** Whether to enable streaming output */\n stream?: boolean;\n /** Callback function to handle streaming chunks */\n onChunk?: StreamingCallback;\n /** Callback function to handle streaming completion */\n onComplete?: (finalCode: string) => void;\n /** Callback function to handle streaming errors */\n onError?: (error: Error) => void;\n}\n\nexport type StreamingCallback = (chunk: CodeGenerationChunk) => void;\n\nexport interface CodeGenerationChunk {\n /** The incremental content chunk */\n content: string;\n /** The reasoning content */\n reasoning_content: string;\n /** The accumulated content so far */\n accumulated: string;\n /** Whether this is the final chunk */\n isComplete: boolean;\n /** Token usage information if available */\n usage?: AIUsageInfo;\n}\n\nexport interface StreamingAIResponse {\n /** The final accumulated content */\n content: string;\n /** Token usage information */\n usage?: AIUsageInfo;\n /** Whether the response was streamed */\n isStreamed: boolean;\n}\n\nexport type TMultimodalPrompt = {\n /**\n * Support use image to inspect elements.\n * The \"images\" field is an object that uses image name as key and image url as value.\n * The image url can be a local path, a http link , or a base64 string.\n */\n images?: {\n name: string;\n url: string;\n }[];\n /**\n * By default, the image url in the \"images\" filed starts with `https://` or `http://` will be directly sent to the LLM.\n * In case the images are not accessible to the LLM (One common case is that image url is internal network only.), you can enable this option.\n * Then image will be download and convert to base64 format.\n */\n convertHttpImage2Base64?: boolean;\n};\n\nexport type TUserPrompt =\n | string\n | ({\n prompt: string;\n } & Partial<TMultimodalPrompt>);\n\nexport interface DeviceAction<ParamType = any> {\n name: string;\n description?: string;\n paramSchema?: string;\n paramDescription?: string;\n location?: 'required' | 'optional' | false;\n whatToLocate?: string; // what to locate if location is required or optional\n call: (context: ExecutorContext, param: ParamType) => Promise<void> | void;\n}\n"],"names":["__webpack_require__","module","getter","definition","key","Object","obj","prop","Symbol","AIResponseFormat","UIContext"],"mappings":";;;;;;;;;;;;;;;;;IACAA,oBAAoB,CAAC,GAAG,CAACC;QACxB,IAAIC,SAASD,UAAUA,OAAO,UAAU,GACvC,IAAOA,MAAM,CAAC,UAAU,GACxB,IAAOA;QACRD,oBAAoB,CAAC,CAACE,QAAQ;YAAE,GAAGA;QAAO;QAC1C,OAAOA;IACR;;;ICPAF,oBAAoB,CAAC,GAAG,CAAC,UAASG;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGH,oBAAoB,CAAC,CAACG,YAAYC,QAAQ,CAACJ,oBAAoB,CAAC,CAAC,UAASI,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAJ,oBAAoB,CAAC,GAAG,CAACM,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFP,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOQ,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;IC0BO,IAAKI,mBAAAA,WAAAA,GAAAA,SAAAA,gBAAgB;;;eAAhBA;;IAwFL,MAAeC;IAMtB"}
|