@midscene/core 0.28.2-beta-20250910065550.0 → 0.28.2-beta-20250910072710.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/dist/es/agent/agent.mjs +27 -17
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/tasks.mjs +43 -31
  4. package/dist/es/agent/tasks.mjs.map +1 -1
  5. package/dist/es/agent/utils.mjs +4 -3
  6. package/dist/es/agent/utils.mjs.map +1 -1
  7. package/dist/es/ai-model/common.mjs +16 -26
  8. package/dist/es/ai-model/common.mjs.map +1 -1
  9. package/dist/es/ai-model/index.mjs +3 -3
  10. package/dist/es/ai-model/inspect.mjs +26 -34
  11. package/dist/es/ai-model/inspect.mjs.map +1 -1
  12. package/dist/es/ai-model/llm-planning.mjs +14 -15
  13. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  14. package/dist/es/ai-model/prompt/common.mjs.map +1 -1
  15. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
  16. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
  17. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
  18. package/dist/es/ai-model/prompt/playwright-generator.mjs +6 -12
  19. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
  20. package/dist/es/ai-model/prompt/util.mjs +3 -3
  21. package/dist/es/ai-model/prompt/util.mjs.map +1 -1
  22. package/dist/es/ai-model/prompt/yaml-generator.mjs +6 -12
  23. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
  24. package/dist/es/ai-model/service-caller/index.mjs +28 -23
  25. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  26. package/dist/es/ai-model/ui-tars-planning.mjs +10 -10
  27. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
  28. package/dist/es/insight/index.mjs +20 -26
  29. package/dist/es/insight/index.mjs.map +1 -1
  30. package/dist/es/types.mjs.map +1 -1
  31. package/dist/es/utils.mjs +4 -5
  32. package/dist/es/utils.mjs.map +1 -1
  33. package/dist/lib/agent/agent.js +26 -16
  34. package/dist/lib/agent/agent.js.map +1 -1
  35. package/dist/lib/agent/tasks.js +43 -31
  36. package/dist/lib/agent/tasks.js.map +1 -1
  37. package/dist/lib/agent/utils.js +4 -3
  38. package/dist/lib/agent/utils.js.map +1 -1
  39. package/dist/lib/ai-model/common.js +18 -31
  40. package/dist/lib/ai-model/common.js.map +1 -1
  41. package/dist/lib/ai-model/index.js +12 -15
  42. package/dist/lib/ai-model/inspect.js +24 -32
  43. package/dist/lib/ai-model/inspect.js.map +1 -1
  44. package/dist/lib/ai-model/llm-planning.js +13 -14
  45. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  46. package/dist/lib/ai-model/prompt/common.js.map +1 -1
  47. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
  48. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
  49. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
  50. package/dist/lib/ai-model/prompt/playwright-generator.js +5 -11
  51. package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
  52. package/dist/lib/ai-model/prompt/util.js +3 -3
  53. package/dist/lib/ai-model/prompt/util.js.map +1 -1
  54. package/dist/lib/ai-model/prompt/yaml-generator.js +5 -11
  55. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
  56. package/dist/lib/ai-model/service-caller/index.js +36 -31
  57. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  58. package/dist/lib/ai-model/ui-tars-planning.js +8 -8
  59. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
  60. package/dist/lib/insight/index.js +17 -23
  61. package/dist/lib/insight/index.js.map +1 -1
  62. package/dist/lib/types.js.map +1 -1
  63. package/dist/lib/utils.js +3 -4
  64. package/dist/lib/utils.js.map +1 -1
  65. package/dist/types/agent/agent.d.ts +2 -0
  66. package/dist/types/agent/tasks.d.ts +8 -7
  67. package/dist/types/agent/utils.d.ts +3 -1
  68. package/dist/types/ai-model/common.d.ts +7 -11
  69. package/dist/types/ai-model/index.d.ts +2 -2
  70. package/dist/types/ai-model/inspect.d.ts +7 -6
  71. package/dist/types/ai-model/llm-planning.d.ts +2 -2
  72. package/dist/types/ai-model/prompt/common.d.ts +2 -2
  73. package/dist/types/ai-model/prompt/llm-locator.d.ts +2 -2
  74. package/dist/types/ai-model/prompt/llm-planning.d.ts +3 -3
  75. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +2 -2
  76. package/dist/types/ai-model/prompt/playwright-generator.d.ts +3 -2
  77. package/dist/types/ai-model/prompt/util.d.ts +3 -2
  78. package/dist/types/ai-model/prompt/yaml-generator.d.ts +3 -2
  79. package/dist/types/ai-model/service-caller/index.d.ts +5 -5
  80. package/dist/types/ai-model/ui-tars-planning.d.ts +3 -3
  81. package/dist/types/device/index.d.ts +2 -2
  82. package/dist/types/insight/index.d.ts +12 -7
  83. package/dist/types/types.d.ts +0 -9
  84. package/dist/types/utils.d.ts +2 -1
  85. package/package.json +3 -3
@@ -1 +1 @@
1
- {"version":3,"file":"agent/utils.mjs","sources":["webpack://@midscene/core/./src/agent/utils.ts"],"sourcesContent":["import { elementByPositionWithElementInfo } from '@/ai-model';\nimport type { AbstractInterface } from '@/device';\nimport type {\n BaseElement,\n ElementTreeNode,\n ExecutionDump,\n ExecutionTask,\n ExecutorContext,\n PlanningLocateParam,\n TMultimodalPrompt,\n TUserPrompt,\n UIContext,\n} from '@/index';\nimport { uploadTestInfoToServer } from '@/utils';\nimport {\n MIDSCENE_REPORT_TAG_NAME,\n globalConfigManager,\n} from '@midscene/shared/env';\nimport {\n generateElementByPosition,\n getNodeFromCacheList,\n} from '@midscene/shared/extractor';\nimport { resizeImgBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { _keyDefinitions } from '@midscene/shared/us-keyboard-layout';\nimport { assert, logMsg, uuid } from '@midscene/shared/utils';\nimport dayjs from 'dayjs';\nimport { debug as cacheDebug } from './task-cache';\nimport type { TaskExecutor } from './tasks';\n\nconst debugProfile = getDebug('web:tool:profile');\n\nexport async function commonContextParser(\n interfaceInstance: AbstractInterface,\n): Promise<UIContext> {\n assert(interfaceInstance, 'interfaceInstance is required');\n\n debugProfile('Getting interface description');\n const description = interfaceInstance.describe?.() || '';\n debugProfile('Interface description end');\n\n debugProfile('Uploading test info to server');\n uploadTestInfoToServer({ testUrl: description });\n debugProfile('UploadTestInfoToServer end');\n\n let screenshotBase64 = await interfaceInstance.screenshotBase64();\n assert(screenshotBase64!, 'screenshotBase64 is required');\n\n const size = await interfaceInstance.size();\n debugProfile(`size: ${size.width}x${size.height} dpr: ${size.dpr}`);\n\n if (size.dpr && size.dpr > 1) {\n debugProfile('Resizing screenshot for high DPR display');\n screenshotBase64 = await resizeImgBase64(screenshotBase64, {\n width: size.width,\n height: size.height,\n });\n debugProfile('ResizeImgBase64 end');\n }\n\n return {\n tree: {\n node: null,\n children: [],\n },\n size,\n screenshotBase64: screenshotBase64!,\n };\n}\n\nexport function getReportFileName(tag = 'web') {\n const reportTagName = globalConfigManager.getEnvConfigValue(\n MIDSCENE_REPORT_TAG_NAME,\n );\n const dateTimeInFileName = dayjs().format('YYYY-MM-DD_HH-mm-ss');\n // ensure uniqueness at the same time\n const uniqueId = uuid().substring(0, 8);\n return `${reportTagName || tag}-${dateTimeInFileName}-${uniqueId}`;\n}\n\nexport function printReportMsg(filepath: string) {\n logMsg(`Midscene - report file updated: ${filepath}`);\n}\n\n/**\n * Get the current execution file name\n * @returns The name of the current execution file\n */\nexport function getCurrentExecutionFile(trace?: string): string | false {\n const error = new Error();\n const stackTrace = trace || error.stack;\n const pkgDir = process.cwd() || '';\n if (stackTrace) {\n const stackLines = stackTrace.split('\\n');\n for (const line of stackLines) {\n if (\n line.includes('.spec.') ||\n line.includes('.test.') ||\n line.includes('.ts') ||\n line.includes('.js')\n ) {\n const match = line.match(/(?:at\\s+)?(.*?\\.(?:spec|test)\\.[jt]s)/);\n if (match?.[1]) {\n const targetFileName = match[1]\n .replace(pkgDir, '')\n .trim()\n .replace('at ', '');\n return targetFileName;\n }\n }\n }\n }\n return false;\n}\n\nconst testFileIndex = new Map<string, number>();\n\nexport function generateCacheId(fileName?: string): string {\n let taskFile = fileName || getCurrentExecutionFile();\n if (!taskFile) {\n taskFile = uuid();\n console.warn(\n 'Midscene - using random UUID for cache id. Cache may be invalid.',\n );\n }\n\n if (testFileIndex.has(taskFile)) {\n const currentIndex = testFileIndex.get(taskFile);\n if (currentIndex !== undefined) {\n testFileIndex.set(taskFile, currentIndex + 1);\n }\n } else {\n testFileIndex.set(taskFile, 1);\n }\n return `${taskFile}-${testFileIndex.get(taskFile)}`;\n}\n\nexport function matchElementFromPlan(\n planLocateParam: PlanningLocateParam,\n tree: ElementTreeNode<BaseElement>,\n) {\n if (!planLocateParam) {\n return undefined;\n }\n if (planLocateParam.id) {\n return getNodeFromCacheList(planLocateParam.id);\n }\n\n if (planLocateParam.bbox) {\n const centerPosition = {\n x: Math.floor((planLocateParam.bbox[0] + planLocateParam.bbox[2]) / 2),\n y: Math.floor((planLocateParam.bbox[1] + planLocateParam.bbox[3]) / 2),\n };\n let element = elementByPositionWithElementInfo(tree, centerPosition);\n\n if (!element) {\n element = generateElementByPosition(centerPosition) as BaseElement;\n }\n\n return element;\n }\n\n return undefined;\n}\n\nexport async function matchElementFromCache(\n taskExecutor: TaskExecutor,\n xpaths: string[] | undefined,\n cachePrompt: TUserPrompt,\n cacheable: boolean | undefined,\n) {\n try {\n if (\n xpaths?.length &&\n taskExecutor.taskCache?.isCacheResultUsed &&\n cacheable !== false &&\n (taskExecutor.interface as any).getElementInfoByXpath\n ) {\n // hit cache, use new id\n for (let i = 0; i < xpaths.length; i++) {\n const element = await (\n taskExecutor.interface as any\n ).getElementInfoByXpath(xpaths[i]);\n\n if (element?.id) {\n cacheDebug('cache hit, prompt: %s', cachePrompt);\n cacheDebug(\n 'found a new element with same xpath, xpath: %s, id: %s',\n xpaths[i],\n element?.id,\n );\n return element;\n }\n }\n }\n } catch (error) {\n cacheDebug('get element info by xpath error: ', error);\n }\n}\n\nexport function trimContextByViewport(execution: ExecutionDump) {\n function filterVisibleTree(\n node: ElementTreeNode<BaseElement>,\n ): ElementTreeNode<BaseElement> | null {\n if (!node) return null;\n\n // recursively process all children\n const filteredChildren = Array.isArray(node.children)\n ? (node.children\n .map(filterVisibleTree)\n .filter((child) => child !== null) as ElementTreeNode<BaseElement>[])\n : [];\n\n // if the current node is visible, keep it and the filtered children\n if (node.node && node.node.isVisible === true) {\n return {\n ...node,\n children: filteredChildren,\n };\n }\n\n // if the current node is invisible, but has visible children, create an empty node to include these children\n if (filteredChildren.length > 0) {\n return {\n node: null,\n children: filteredChildren,\n };\n }\n\n // if the current node is invisible and has no visible children, return null\n return null;\n }\n\n return {\n ...execution,\n tasks: Array.isArray(execution.tasks)\n ? execution.tasks.map((task: ExecutionTask) => {\n const newTask = { ...task };\n if (task.uiContext?.tree) {\n newTask.uiContext = {\n ...task.uiContext,\n tree: filterVisibleTree(task.uiContext.tree) || {\n node: null,\n children: [],\n },\n };\n }\n return newTask;\n })\n : execution.tasks,\n };\n}\n\ndeclare const __VERSION__: string | undefined;\n\nexport const getMidsceneVersion = (): string => {\n if (typeof __VERSION__ !== 'undefined') {\n return __VERSION__;\n } else if (\n process.env.__VERSION__ &&\n process.env.__VERSION__ !== 'undefined'\n ) {\n return process.env.__VERSION__;\n }\n throw new Error('__VERSION__ inject failed during build');\n};\n\nexport const parsePrompt = (\n prompt: TUserPrompt,\n): {\n textPrompt: string;\n multimodalPrompt?: TMultimodalPrompt;\n} => {\n if (typeof prompt === 'string') {\n return {\n textPrompt: prompt,\n multimodalPrompt: undefined,\n };\n }\n return {\n textPrompt: prompt.prompt,\n multimodalPrompt: prompt.images\n ? {\n images: prompt.images,\n convertHttpImage2Base64: !!prompt.convertHttpImage2Base64,\n }\n : undefined,\n };\n};\n"],"names":["debugProfile","getDebug","commonContextParser","interfaceInstance","assert","description","uploadTestInfoToServer","screenshotBase64","size","resizeImgBase64","getReportFileName","tag","reportTagName","globalConfigManager","MIDSCENE_REPORT_TAG_NAME","dateTimeInFileName","dayjs","uniqueId","uuid","printReportMsg","filepath","logMsg","getCurrentExecutionFile","trace","error","Error","stackTrace","pkgDir","process","stackLines","line","match","targetFileName","testFileIndex","Map","generateCacheId","fileName","taskFile","console","currentIndex","undefined","matchElementFromPlan","planLocateParam","tree","getNodeFromCacheList","centerPosition","Math","element","elementByPositionWithElementInfo","generateElementByPosition","matchElementFromCache","taskExecutor","xpaths","cachePrompt","cacheable","_taskExecutor_taskCache","i","cacheDebug","trimContextByViewport","execution","filterVisibleTree","node","filteredChildren","Array","child","task","_task_uiContext","newTask","getMidsceneVersion","__VERSION__","parsePrompt","prompt"],"mappings":";;;;;;;;;AA8BA,MAAMA,eAAeC,SAAS;AAEvB,eAAeC,oBACpBC,iBAAoC;QAKhBA;IAHpBC,OAAOD,mBAAmB;IAE1BH,aAAa;IACb,MAAMK,cAAcF,AAAAA,SAAAA,CAAAA,8BAAAA,kBAAkB,QAAQ,AAAD,IAAzBA,KAAAA,IAAAA,4BAAAA,IAAAA,CAAAA,kBAAiB,KAAiB;IACtDH,aAAa;IAEbA,aAAa;IACbM,uBAAuB;QAAE,SAASD;IAAY;IAC9CL,aAAa;IAEb,IAAIO,mBAAmB,MAAMJ,kBAAkB,gBAAgB;IAC/DC,OAAOG,kBAAmB;IAE1B,MAAMC,OAAO,MAAML,kBAAkB,IAAI;IACzCH,aAAa,CAAC,MAAM,EAAEQ,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,MAAM,EAAEA,KAAK,GAAG,EAAE;IAElE,IAAIA,KAAK,GAAG,IAAIA,KAAK,GAAG,GAAG,GAAG;QAC5BR,aAAa;QACbO,mBAAmB,MAAME,gBAAgBF,kBAAkB;YACzD,OAAOC,KAAK,KAAK;YACjB,QAAQA,KAAK,MAAM;QACrB;QACAR,aAAa;IACf;IAEA,OAAO;QACL,MAAM;YACJ,MAAM;YACN,UAAU,EAAE;QACd;QACAQ;QACA,kBAAkBD;IACpB;AACF;AAEO,SAASG,kBAAkBC,MAAM,KAAK;IAC3C,MAAMC,gBAAgBC,oBAAoB,iBAAiB,CACzDC;IAEF,MAAMC,qBAAqBC,QAAQ,MAAM,CAAC;IAE1C,MAAMC,WAAWC,OAAO,SAAS,CAAC,GAAG;IACrC,OAAO,GAAGN,iBAAiBD,IAAI,CAAC,EAAEI,mBAAmB,CAAC,EAAEE,UAAU;AACpE;AAEO,SAASE,eAAeC,QAAgB;IAC7CC,OAAO,CAAC,gCAAgC,EAAED,UAAU;AACtD;AAMO,SAASE,wBAAwBC,KAAc;IACpD,MAAMC,QAAQ,IAAIC;IAClB,MAAMC,aAAaH,SAASC,MAAM,KAAK;IACvC,MAAMG,SAASC,QAAQ,GAAG,MAAM;IAChC,IAAIF,YAAY;QACd,MAAMG,aAAaH,WAAW,KAAK,CAAC;QACpC,KAAK,MAAMI,QAAQD,WACjB,IACEC,KAAK,QAAQ,CAAC,aACdA,KAAK,QAAQ,CAAC,aACdA,KAAK,QAAQ,CAAC,UACdA,KAAK,QAAQ,CAAC,QACd;YACA,MAAMC,QAAQD,KAAK,KAAK,CAAC;YACzB,IAAIC,QAAAA,QAAAA,KAAAA,IAAAA,KAAO,CAAC,EAAE,EAAE;gBACd,MAAMC,iBAAiBD,KAAK,CAAC,EAAE,CAC5B,OAAO,CAACJ,QAAQ,IAChB,IAAI,GACJ,OAAO,CAAC,OAAO;gBAClB,OAAOK;YACT;QACF;IAEJ;IACA,OAAO;AACT;AAEA,MAAMC,gBAAgB,IAAIC;AAEnB,SAASC,gBAAgBC,QAAiB;IAC/C,IAAIC,WAAWD,YAAYd;IAC3B,IAAI,CAACe,UAAU;QACbA,WAAWnB;QACXoB,QAAQ,IAAI,CACV;IAEJ;IAEA,IAAIL,cAAc,GAAG,CAACI,WAAW;QAC/B,MAAME,eAAeN,cAAc,GAAG,CAACI;QACvC,IAAIE,AAAiBC,WAAjBD,cACFN,cAAc,GAAG,CAACI,UAAUE,eAAe;IAE/C,OACEN,cAAc,GAAG,CAACI,UAAU;IAE9B,OAAO,GAAGA,SAAS,CAAC,EAAEJ,cAAc,GAAG,CAACI,WAAW;AACrD;AAEO,SAASI,qBACdC,eAAoC,EACpCC,IAAkC;IAElC,IAAI,CAACD,iBACH;IAEF,IAAIA,gBAAgB,EAAE,EACpB,OAAOE,qBAAqBF,gBAAgB,EAAE;IAGhD,IAAIA,gBAAgB,IAAI,EAAE;QACxB,MAAMG,iBAAiB;YACrB,GAAGC,KAAK,KAAK,CAAEJ,AAAAA,CAAAA,gBAAgB,IAAI,CAAC,EAAE,GAAGA,gBAAgB,IAAI,CAAC,EAAC,IAAK;YACpE,GAAGI,KAAK,KAAK,CAAEJ,AAAAA,CAAAA,gBAAgB,IAAI,CAAC,EAAE,GAAGA,gBAAgB,IAAI,CAAC,EAAC,IAAK;QACtE;QACA,IAAIK,UAAUC,iCAAiCL,MAAME;QAErD,IAAI,CAACE,SACHA,UAAUE,0BAA0BJ;QAGtC,OAAOE;IACT;AAGF;AAEO,eAAeG,sBACpBC,YAA0B,EAC1BC,MAA4B,EAC5BC,WAAwB,EACxBC,SAA8B;IAE9B,IAAI;YAGAC;QAFF,IACEH,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM,AAAD,KAAC,SACdG,CAAAA,0BAAAA,aAAa,SAAS,AAAD,IAArBA,KAAAA,IAAAA,wBAAwB,iBAAiB,AAAD,KACxCD,AAAc,UAAdA,aACCH,aAAa,SAAS,CAAS,qBAAqB,EAGrD,IAAK,IAAIK,IAAI,GAAGA,IAAIJ,OAAO,MAAM,EAAEI,IAAK;YACtC,MAAMT,UAAU,MACdI,aAAa,SAAS,CACtB,qBAAqB,CAACC,MAAM,CAACI,EAAE;YAEjC,IAAIT,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,EAAE,EAAE;gBACfU,MAAW,yBAAyBJ;gBACpCI,MACE,0DACAL,MAAM,CAACI,EAAE,EACTT,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,EAAE;gBAEb,OAAOA;YACT;QACF;IAEJ,EAAE,OAAOvB,OAAO;QACdiC,MAAW,qCAAqCjC;IAClD;AACF;AAEO,SAASkC,sBAAsBC,SAAwB;IAC5D,SAASC,kBACPC,IAAkC;QAElC,IAAI,CAACA,MAAM,OAAO;QAGlB,MAAMC,mBAAmBC,MAAM,OAAO,CAACF,KAAK,QAAQ,IAC/CA,KAAK,QAAQ,CACX,GAAG,CAACD,mBACJ,MAAM,CAAC,CAACI,QAAUA,AAAU,SAAVA,SACrB,EAAE;QAGN,IAAIH,KAAK,IAAI,IAAIA,AAAwB,SAAxBA,KAAK,IAAI,CAAC,SAAS,EAClC,OAAO;YACL,GAAGA,IAAI;YACP,UAAUC;QACZ;QAIF,IAAIA,iBAAiB,MAAM,GAAG,GAC5B,OAAO;YACL,MAAM;YACN,UAAUA;QACZ;QAIF,OAAO;IACT;IAEA,OAAO;QACL,GAAGH,SAAS;QACZ,OAAOI,MAAM,OAAO,CAACJ,UAAU,KAAK,IAChCA,UAAU,KAAK,CAAC,GAAG,CAAC,CAACM;gBAEfC;YADJ,MAAMC,UAAU;gBAAE,GAAGF,IAAI;YAAC;YAC1B,IAAI,QAAAC,CAAAA,kBAAAA,KAAK,SAAS,AAAD,IAAbA,KAAAA,IAAAA,gBAAgB,IAAI,EACtBC,QAAQ,SAAS,GAAG;gBAClB,GAAGF,KAAK,SAAS;gBACjB,MAAML,kBAAkBK,KAAK,SAAS,CAAC,IAAI,KAAK;oBAC9C,MAAM;oBACN,UAAU,EAAE;gBACd;YACF;YAEF,OAAOE;QACT,KACAR,UAAU,KAAK;IACrB;AACF;AAIO,MAAMS,qBAAqB,IAEvBC;AAUJ,MAAMC,cAAc,CACzBC;IAKA,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAO;QACL,YAAYA;QACZ,kBAAkB/B;IACpB;IAEF,OAAO;QACL,YAAY+B,OAAO,MAAM;QACzB,kBAAkBA,OAAO,MAAM,GAC3B;YACE,QAAQA,OAAO,MAAM;YACrB,yBAAyB,CAAC,CAACA,OAAO,uBAAuB;QAC3D,IACA/B;IACN;AACF"}
1
+ {"version":3,"file":"agent/utils.mjs","sources":["webpack://@midscene/core/./src/agent/utils.ts"],"sourcesContent":["import { elementByPositionWithElementInfo } from '@/ai-model';\nimport type { AbstractInterface } from '@/device';\nimport type {\n BaseElement,\n ElementTreeNode,\n ExecutionDump,\n ExecutionTask,\n ExecutorContext,\n PlanningLocateParam,\n TMultimodalPrompt,\n TUserPrompt,\n UIContext,\n} from '@/index';\nimport { uploadTestInfoToServer } from '@/utils';\nimport {\n MIDSCENE_REPORT_TAG_NAME,\n globalConfigManager,\n} from '@midscene/shared/env';\nimport {\n generateElementByPosition,\n getNodeFromCacheList,\n} from '@midscene/shared/extractor';\nimport { resizeImgBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { _keyDefinitions } from '@midscene/shared/us-keyboard-layout';\nimport { assert, logMsg, uuid } from '@midscene/shared/utils';\nimport dayjs from 'dayjs';\nimport { debug as cacheDebug } from './task-cache';\nimport type { TaskExecutor } from './tasks';\n\nconst debugProfile = getDebug('web:tool:profile');\n\nexport async function commonContextParser(\n interfaceInstance: AbstractInterface,\n _opt: { uploadServerUrl?: string },\n): Promise<UIContext> {\n assert(interfaceInstance, 'interfaceInstance is required');\n\n debugProfile('Getting interface description');\n const description = interfaceInstance.describe?.() || '';\n debugProfile('Interface description end');\n\n debugProfile('Uploading test info to server');\n uploadTestInfoToServer({\n testUrl: description,\n serverUrl: _opt.uploadServerUrl,\n });\n debugProfile('UploadTestInfoToServer end');\n\n let screenshotBase64 = await interfaceInstance.screenshotBase64();\n assert(screenshotBase64!, 'screenshotBase64 is required');\n\n const size = await interfaceInstance.size();\n debugProfile(`size: ${size.width}x${size.height} dpr: ${size.dpr}`);\n\n if (size.dpr && size.dpr > 1) {\n debugProfile('Resizing screenshot for high DPR display');\n screenshotBase64 = await resizeImgBase64(screenshotBase64, {\n width: size.width,\n height: size.height,\n });\n debugProfile('ResizeImgBase64 end');\n }\n\n return {\n tree: {\n node: null,\n children: [],\n },\n size,\n screenshotBase64: screenshotBase64!,\n };\n}\n\nexport function getReportFileName(tag = 'web') {\n const reportTagName = globalConfigManager.getEnvConfigValue(\n MIDSCENE_REPORT_TAG_NAME,\n );\n const dateTimeInFileName = dayjs().format('YYYY-MM-DD_HH-mm-ss');\n // ensure uniqueness at the same time\n const uniqueId = uuid().substring(0, 8);\n return `${reportTagName || tag}-${dateTimeInFileName}-${uniqueId}`;\n}\n\nexport function printReportMsg(filepath: string) {\n logMsg(`Midscene - report file updated: ${filepath}`);\n}\n\n/**\n * Get the current execution file name\n * @returns The name of the current execution file\n */\nexport function getCurrentExecutionFile(trace?: string): string | false {\n const error = new Error();\n const stackTrace = trace || error.stack;\n const pkgDir = process.cwd() || '';\n if (stackTrace) {\n const stackLines = stackTrace.split('\\n');\n for (const line of stackLines) {\n if (\n line.includes('.spec.') ||\n line.includes('.test.') ||\n line.includes('.ts') ||\n line.includes('.js')\n ) {\n const match = line.match(/(?:at\\s+)?(.*?\\.(?:spec|test)\\.[jt]s)/);\n if (match?.[1]) {\n const targetFileName = match[1]\n .replace(pkgDir, '')\n .trim()\n .replace('at ', '');\n return targetFileName;\n }\n }\n }\n }\n return false;\n}\n\nconst testFileIndex = new Map<string, number>();\n\nexport function generateCacheId(fileName?: string): string {\n let taskFile = fileName || getCurrentExecutionFile();\n if (!taskFile) {\n taskFile = uuid();\n console.warn(\n 'Midscene - using random UUID for cache id. Cache may be invalid.',\n );\n }\n\n if (testFileIndex.has(taskFile)) {\n const currentIndex = testFileIndex.get(taskFile);\n if (currentIndex !== undefined) {\n testFileIndex.set(taskFile, currentIndex + 1);\n }\n } else {\n testFileIndex.set(taskFile, 1);\n }\n return `${taskFile}-${testFileIndex.get(taskFile)}`;\n}\n\nexport function matchElementFromPlan(\n planLocateParam: PlanningLocateParam,\n tree: ElementTreeNode<BaseElement>,\n) {\n if (!planLocateParam) {\n return undefined;\n }\n if (planLocateParam.id) {\n return getNodeFromCacheList(planLocateParam.id);\n }\n\n if (planLocateParam.bbox) {\n const centerPosition = {\n x: Math.floor((planLocateParam.bbox[0] + planLocateParam.bbox[2]) / 2),\n y: Math.floor((planLocateParam.bbox[1] + planLocateParam.bbox[3]) / 2),\n };\n let element = elementByPositionWithElementInfo(tree, centerPosition);\n\n if (!element) {\n element = generateElementByPosition(centerPosition) as BaseElement;\n }\n\n return element;\n }\n\n return undefined;\n}\n\nexport async function matchElementFromCache(\n taskExecutor: TaskExecutor,\n xpaths: string[] | undefined,\n cachePrompt: TUserPrompt,\n cacheable: boolean | undefined,\n) {\n try {\n if (\n xpaths?.length &&\n taskExecutor.taskCache?.isCacheResultUsed &&\n cacheable !== false &&\n (taskExecutor.interface as any).getElementInfoByXpath\n ) {\n // hit cache, use new id\n for (let i = 0; i < xpaths.length; i++) {\n const element = await (\n taskExecutor.interface as any\n ).getElementInfoByXpath(xpaths[i]);\n\n if (element?.id) {\n cacheDebug('cache hit, prompt: %s', cachePrompt);\n cacheDebug(\n 'found a new element with same xpath, xpath: %s, id: %s',\n xpaths[i],\n element?.id,\n );\n return element;\n }\n }\n }\n } catch (error) {\n cacheDebug('get element info by xpath error: ', error);\n }\n}\n\nexport function trimContextByViewport(execution: ExecutionDump) {\n function filterVisibleTree(\n node: ElementTreeNode<BaseElement>,\n ): ElementTreeNode<BaseElement> | null {\n if (!node) return null;\n\n // recursively process all children\n const filteredChildren = Array.isArray(node.children)\n ? (node.children\n .map(filterVisibleTree)\n .filter((child) => child !== null) as ElementTreeNode<BaseElement>[])\n : [];\n\n // if the current node is visible, keep it and the filtered children\n if (node.node && node.node.isVisible === true) {\n return {\n ...node,\n children: filteredChildren,\n };\n }\n\n // if the current node is invisible, but has visible children, create an empty node to include these children\n if (filteredChildren.length > 0) {\n return {\n node: null,\n children: filteredChildren,\n };\n }\n\n // if the current node is invisible and has no visible children, return null\n return null;\n }\n\n return {\n ...execution,\n tasks: Array.isArray(execution.tasks)\n ? execution.tasks.map((task: ExecutionTask) => {\n const newTask = { ...task };\n if (task.uiContext?.tree) {\n newTask.uiContext = {\n ...task.uiContext,\n tree: filterVisibleTree(task.uiContext.tree) || {\n node: null,\n children: [],\n },\n };\n }\n return newTask;\n })\n : execution.tasks,\n };\n}\n\ndeclare const __VERSION__: string | undefined;\n\nexport const getMidsceneVersion = (): string => {\n if (typeof __VERSION__ !== 'undefined') {\n return __VERSION__;\n } else if (\n process.env.__VERSION__ &&\n process.env.__VERSION__ !== 'undefined'\n ) {\n return process.env.__VERSION__;\n }\n throw new Error('__VERSION__ inject failed during build');\n};\n\nexport const parsePrompt = (\n prompt: TUserPrompt,\n): {\n textPrompt: string;\n multimodalPrompt?: TMultimodalPrompt;\n} => {\n if (typeof prompt === 'string') {\n return {\n textPrompt: prompt,\n multimodalPrompt: undefined,\n };\n }\n return {\n textPrompt: prompt.prompt,\n multimodalPrompt: prompt.images\n ? {\n images: prompt.images,\n convertHttpImage2Base64: !!prompt.convertHttpImage2Base64,\n }\n : undefined,\n };\n};\n"],"names":["debugProfile","getDebug","commonContextParser","interfaceInstance","_opt","assert","description","uploadTestInfoToServer","screenshotBase64","size","resizeImgBase64","getReportFileName","tag","reportTagName","globalConfigManager","MIDSCENE_REPORT_TAG_NAME","dateTimeInFileName","dayjs","uniqueId","uuid","printReportMsg","filepath","logMsg","getCurrentExecutionFile","trace","error","Error","stackTrace","pkgDir","process","stackLines","line","match","targetFileName","testFileIndex","Map","generateCacheId","fileName","taskFile","console","currentIndex","undefined","matchElementFromPlan","planLocateParam","tree","getNodeFromCacheList","centerPosition","Math","element","elementByPositionWithElementInfo","generateElementByPosition","matchElementFromCache","taskExecutor","xpaths","cachePrompt","cacheable","_taskExecutor_taskCache","i","cacheDebug","trimContextByViewport","execution","filterVisibleTree","node","filteredChildren","Array","child","task","_task_uiContext","newTask","getMidsceneVersion","__VERSION__","parsePrompt","prompt"],"mappings":";;;;;;;;;AA8BA,MAAMA,eAAeC,SAAS;AAEvB,eAAeC,oBACpBC,iBAAoC,EACpCC,IAAkC;QAKdD;IAHpBE,OAAOF,mBAAmB;IAE1BH,aAAa;IACb,MAAMM,cAAcH,AAAAA,SAAAA,CAAAA,8BAAAA,kBAAkB,QAAQ,AAAD,IAAzBA,KAAAA,IAAAA,4BAAAA,IAAAA,CAAAA,kBAAiB,KAAiB;IACtDH,aAAa;IAEbA,aAAa;IACbO,uBAAuB;QACrB,SAASD;QACT,WAAWF,KAAK,eAAe;IACjC;IACAJ,aAAa;IAEb,IAAIQ,mBAAmB,MAAML,kBAAkB,gBAAgB;IAC/DE,OAAOG,kBAAmB;IAE1B,MAAMC,OAAO,MAAMN,kBAAkB,IAAI;IACzCH,aAAa,CAAC,MAAM,EAAES,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,MAAM,EAAEA,KAAK,GAAG,EAAE;IAElE,IAAIA,KAAK,GAAG,IAAIA,KAAK,GAAG,GAAG,GAAG;QAC5BT,aAAa;QACbQ,mBAAmB,MAAME,gBAAgBF,kBAAkB;YACzD,OAAOC,KAAK,KAAK;YACjB,QAAQA,KAAK,MAAM;QACrB;QACAT,aAAa;IACf;IAEA,OAAO;QACL,MAAM;YACJ,MAAM;YACN,UAAU,EAAE;QACd;QACAS;QACA,kBAAkBD;IACpB;AACF;AAEO,SAASG,kBAAkBC,MAAM,KAAK;IAC3C,MAAMC,gBAAgBC,oBAAoB,iBAAiB,CACzDC;IAEF,MAAMC,qBAAqBC,QAAQ,MAAM,CAAC;IAE1C,MAAMC,WAAWC,OAAO,SAAS,CAAC,GAAG;IACrC,OAAO,GAAGN,iBAAiBD,IAAI,CAAC,EAAEI,mBAAmB,CAAC,EAAEE,UAAU;AACpE;AAEO,SAASE,eAAeC,QAAgB;IAC7CC,OAAO,CAAC,gCAAgC,EAAED,UAAU;AACtD;AAMO,SAASE,wBAAwBC,KAAc;IACpD,MAAMC,QAAQ,IAAIC;IAClB,MAAMC,aAAaH,SAASC,MAAM,KAAK;IACvC,MAAMG,SAASC,QAAQ,GAAG,MAAM;IAChC,IAAIF,YAAY;QACd,MAAMG,aAAaH,WAAW,KAAK,CAAC;QACpC,KAAK,MAAMI,QAAQD,WACjB,IACEC,KAAK,QAAQ,CAAC,aACdA,KAAK,QAAQ,CAAC,aACdA,KAAK,QAAQ,CAAC,UACdA,KAAK,QAAQ,CAAC,QACd;YACA,MAAMC,QAAQD,KAAK,KAAK,CAAC;YACzB,IAAIC,QAAAA,QAAAA,KAAAA,IAAAA,KAAO,CAAC,EAAE,EAAE;gBACd,MAAMC,iBAAiBD,KAAK,CAAC,EAAE,CAC5B,OAAO,CAACJ,QAAQ,IAChB,IAAI,GACJ,OAAO,CAAC,OAAO;gBAClB,OAAOK;YACT;QACF;IAEJ;IACA,OAAO;AACT;AAEA,MAAMC,gBAAgB,IAAIC;AAEnB,SAASC,gBAAgBC,QAAiB;IAC/C,IAAIC,WAAWD,YAAYd;IAC3B,IAAI,CAACe,UAAU;QACbA,WAAWnB;QACXoB,QAAQ,IAAI,CACV;IAEJ;IAEA,IAAIL,cAAc,GAAG,CAACI,WAAW;QAC/B,MAAME,eAAeN,cAAc,GAAG,CAACI;QACvC,IAAIE,AAAiBC,WAAjBD,cACFN,cAAc,GAAG,CAACI,UAAUE,eAAe;IAE/C,OACEN,cAAc,GAAG,CAACI,UAAU;IAE9B,OAAO,GAAGA,SAAS,CAAC,EAAEJ,cAAc,GAAG,CAACI,WAAW;AACrD;AAEO,SAASI,qBACdC,eAAoC,EACpCC,IAAkC;IAElC,IAAI,CAACD,iBACH;IAEF,IAAIA,gBAAgB,EAAE,EACpB,OAAOE,qBAAqBF,gBAAgB,EAAE;IAGhD,IAAIA,gBAAgB,IAAI,EAAE;QACxB,MAAMG,iBAAiB;YACrB,GAAGC,KAAK,KAAK,CAAEJ,AAAAA,CAAAA,gBAAgB,IAAI,CAAC,EAAE,GAAGA,gBAAgB,IAAI,CAAC,EAAC,IAAK;YACpE,GAAGI,KAAK,KAAK,CAAEJ,AAAAA,CAAAA,gBAAgB,IAAI,CAAC,EAAE,GAAGA,gBAAgB,IAAI,CAAC,EAAC,IAAK;QACtE;QACA,IAAIK,UAAUC,iCAAiCL,MAAME;QAErD,IAAI,CAACE,SACHA,UAAUE,0BAA0BJ;QAGtC,OAAOE;IACT;AAGF;AAEO,eAAeG,sBACpBC,YAA0B,EAC1BC,MAA4B,EAC5BC,WAAwB,EACxBC,SAA8B;IAE9B,IAAI;YAGAC;QAFF,IACEH,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM,AAAD,KAAC,SACdG,CAAAA,0BAAAA,aAAa,SAAS,AAAD,IAArBA,KAAAA,IAAAA,wBAAwB,iBAAiB,AAAD,KACxCD,AAAc,UAAdA,aACCH,aAAa,SAAS,CAAS,qBAAqB,EAGrD,IAAK,IAAIK,IAAI,GAAGA,IAAIJ,OAAO,MAAM,EAAEI,IAAK;YACtC,MAAMT,UAAU,MACdI,aAAa,SAAS,CACtB,qBAAqB,CAACC,MAAM,CAACI,EAAE;YAEjC,IAAIT,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,EAAE,EAAE;gBACfU,MAAW,yBAAyBJ;gBACpCI,MACE,0DACAL,MAAM,CAACI,EAAE,EACTT,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,EAAE;gBAEb,OAAOA;YACT;QACF;IAEJ,EAAE,OAAOvB,OAAO;QACdiC,MAAW,qCAAqCjC;IAClD;AACF;AAEO,SAASkC,sBAAsBC,SAAwB;IAC5D,SAASC,kBACPC,IAAkC;QAElC,IAAI,CAACA,MAAM,OAAO;QAGlB,MAAMC,mBAAmBC,MAAM,OAAO,CAACF,KAAK,QAAQ,IAC/CA,KAAK,QAAQ,CACX,GAAG,CAACD,mBACJ,MAAM,CAAC,CAACI,QAAUA,AAAU,SAAVA,SACrB,EAAE;QAGN,IAAIH,KAAK,IAAI,IAAIA,AAAwB,SAAxBA,KAAK,IAAI,CAAC,SAAS,EAClC,OAAO;YACL,GAAGA,IAAI;YACP,UAAUC;QACZ;QAIF,IAAIA,iBAAiB,MAAM,GAAG,GAC5B,OAAO;YACL,MAAM;YACN,UAAUA;QACZ;QAIF,OAAO;IACT;IAEA,OAAO;QACL,GAAGH,SAAS;QACZ,OAAOI,MAAM,OAAO,CAACJ,UAAU,KAAK,IAChCA,UAAU,KAAK,CAAC,GAAG,CAAC,CAACM;gBAEfC;YADJ,MAAMC,UAAU;gBAAE,GAAGF,IAAI;YAAC;YAC1B,IAAI,QAAAC,CAAAA,kBAAAA,KAAK,SAAS,AAAD,IAAbA,KAAAA,IAAAA,gBAAgB,IAAI,EACtBC,QAAQ,SAAS,GAAG;gBAClB,GAAGF,KAAK,SAAS;gBACjB,MAAML,kBAAkBK,KAAK,SAAS,CAAC,IAAI,KAAK;oBAC9C,MAAM;oBACN,UAAU,EAAE;gBACd;YACF;YAEF,OAAOE;QACT,KACAR,UAAU,KAAK;IACrB;AACF;AAIO,MAAMS,qBAAqB,IAEvBC;AAUJ,MAAMC,cAAc,CACzBC;IAKA,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAO;QACL,YAAYA;QACZ,kBAAkB/B;IACpB;IAEF,OAAO;QACL,YAAY+B,OAAO,MAAM;QACzB,kBAAkBA,OAAO,MAAM,GAC3B;YACE,QAAQA,OAAO,MAAM;YACrB,yBAAyB,CAAC,CAACA,OAAO,uBAAuB;QAC3D,IACA/B;IACN;AACF"}
@@ -1,7 +1,5 @@
1
1
  import { assert } from "@midscene/shared/utils";
2
- import { callToGetJSONObject } from "./service-caller/index.mjs";
3
2
  import { NodeType } from "@midscene/shared/constants";
4
- import { getModelName, vlLocateMode } from "@midscene/shared/env";
5
3
  import { treeToList } from "@midscene/shared/extractor";
6
4
  import { compositeElementInfoImg } from "@midscene/shared/img";
7
5
  import { getDebug } from "@midscene/shared/logger";
@@ -14,21 +12,14 @@ var common_AIActionType = /*#__PURE__*/ function(AIActionType) {
14
12
  AIActionType[AIActionType["DESCRIBE_ELEMENT"] = 4] = "DESCRIBE_ELEMENT";
15
13
  return AIActionType;
16
14
  }({});
17
- async function callAiFn(msgs, AIActionTypeValue, modelPreferences) {
18
- const jsonObject = await callToGetJSONObject(msgs, AIActionTypeValue, modelPreferences);
19
- return {
20
- content: jsonObject.content,
21
- usage: jsonObject.usage
22
- };
23
- }
24
15
  const defaultBboxSize = 20;
25
16
  const debugInspectUtils = getDebug('ai:common');
26
- function fillBboxParam(locate, width, height, modelPreferences) {
17
+ function fillBboxParam(locate, width, height, vlMode) {
27
18
  if (locate.bbox_2d && !(null == locate ? void 0 : locate.bbox)) {
28
19
  locate.bbox = locate.bbox_2d;
29
20
  delete locate.bbox_2d;
30
21
  }
31
- if (null == locate ? void 0 : locate.bbox) locate.bbox = adaptBbox(locate.bbox, width, height, modelPreferences);
22
+ if (null == locate ? void 0 : locate.bbox) locate.bbox = adaptBbox(locate.bbox, width, height, vlMode);
32
23
  return locate;
33
24
  }
34
25
  function adaptQwenBbox(bbox) {
@@ -90,9 +81,9 @@ function adaptDoubaoBbox(bbox, width, height) {
90
81
  const msg = `invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;
91
82
  throw new Error(msg);
92
83
  }
93
- function adaptBbox(bbox, width, height, modelPreferences) {
94
- if ('doubao-vision' === vlLocateMode(modelPreferences) || 'vlm-ui-tars' === vlLocateMode(modelPreferences)) return adaptDoubaoBbox(bbox, width, height);
95
- if ('gemini' === vlLocateMode(modelPreferences)) return adaptGeminiBbox(bbox, width, height);
84
+ function adaptBbox(bbox, width, height, vlMode) {
85
+ if ('doubao-vision' === vlMode || 'vlm-ui-tars' === vlMode) return adaptDoubaoBbox(bbox, width, height);
86
+ if ('gemini' === vlMode) return adaptGeminiBbox(bbox, width, height);
96
87
  return adaptQwenBbox(bbox);
97
88
  }
98
89
  function adaptGeminiBbox(bbox, width, height) {
@@ -107,11 +98,11 @@ function adaptGeminiBbox(bbox, width, height) {
107
98
  bottom
108
99
  ];
109
100
  }
110
- function adaptBboxToRect(bbox, width, height, modelPreferences, offsetX = 0, offsetY = 0) {
101
+ function adaptBboxToRect(bbox, width, height, offsetX = 0, offsetY = 0, vlMode) {
111
102
  debugInspectUtils('adaptBboxToRect', bbox, width, height, offsetX, offsetY);
112
- const [left, top, right, bottom] = adaptBbox(bbox, width, height, modelPreferences);
113
- const rectLeft = left;
114
- const rectTop = top;
103
+ const [left, top, right, bottom] = adaptBbox(bbox, width, height, vlMode);
104
+ const rectLeft = left + offsetX;
105
+ const rectTop = top + offsetY;
115
106
  let rectWidth = right - left;
116
107
  let rectHeight = bottom - top;
117
108
  if (rectLeft + rectWidth > width) rectWidth = width - rectLeft;
@@ -119,8 +110,8 @@ function adaptBboxToRect(bbox, width, height, modelPreferences, offsetX = 0, off
119
110
  rectWidth = Math.max(1, rectWidth);
120
111
  rectHeight = Math.max(1, rectHeight);
121
112
  const rect = {
122
- left: rectLeft + offsetX,
123
- top: rectTop + offsetY,
113
+ left: rectLeft,
114
+ top: rectTop,
124
115
  width: rectWidth,
125
116
  height: rectHeight
126
117
  };
@@ -128,10 +119,9 @@ function adaptBboxToRect(bbox, width, height, modelPreferences, offsetX = 0, off
128
119
  return rect;
129
120
  }
130
121
  let warned = false;
131
- function warnGPT4oSizeLimit(size, modelPreferences) {
132
- var _getModelName;
122
+ function warnGPT4oSizeLimit(size, modelName) {
133
123
  if (warned) return;
134
- if (null == (_getModelName = getModelName(modelPreferences)) ? void 0 : _getModelName.toLowerCase().includes('gpt-4o')) {
124
+ if (modelName.toLowerCase().includes('gpt-4o')) {
135
125
  const warningMsg = `GPT-4o has a maximum image input size of 2000x768 or 768x2000, but got ${size.width}x${size.height}. Please set your interface to a smaller resolution. Otherwise, the result may be inaccurate.`;
136
126
  if (Math.max(size.width, size.height) > 2000 || Math.min(size.width, size.height) > 768) {
137
127
  console.warn(warningMsg);
@@ -154,8 +144,8 @@ function mergeRects(rects) {
154
144
  height: maxBottom - minTop
155
145
  };
156
146
  }
157
- function expandSearchArea(rect, screenSize, modelPreferences) {
158
- const minEdgeSize = 'doubao-vision' === vlLocateMode(modelPreferences) ? 500 : 300;
147
+ function expandSearchArea(rect, screenSize, vlMode) {
148
+ const minEdgeSize = 'doubao-vision' === vlMode ? 500 : 300;
159
149
  const defaultPadding = 160;
160
150
  const paddingSizeHorizontal = rect.width < minEdgeSize ? Math.ceil((minEdgeSize - rect.width) / 2) : defaultPadding;
161
151
  const paddingSizeVertical = rect.height < minEdgeSize ? Math.ceil((minEdgeSize - rect.height) / 2) : defaultPadding;
@@ -309,6 +299,6 @@ const loadActionParam = (jsonObject, zodSchema)=>{
309
299
  }
310
300
  return result;
311
301
  };
312
- export { common_AIActionType as AIActionType, PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBbox, adaptBboxToRect, adaptDoubaoBbox, adaptGeminiBbox, adaptQwenBbox, buildYamlFlowFromPlans, callAiFn, dumpActionParam, dumpMidsceneLocatorField, expandSearchArea, fillBboxParam, findAllMidsceneLocatorField, getMidsceneLocationSchema, ifMidsceneLocatorField, loadActionParam, markupImageForLLM, mergeRects, warnGPT4oSizeLimit };
302
+ export { common_AIActionType as AIActionType, PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBbox, adaptBboxToRect, adaptDoubaoBbox, adaptGeminiBbox, adaptQwenBbox, buildYamlFlowFromPlans, dumpActionParam, dumpMidsceneLocatorField, expandSearchArea, fillBboxParam, findAllMidsceneLocatorField, getMidsceneLocationSchema, ifMidsceneLocatorField, loadActionParam, markupImageForLLM, mergeRects, warnGPT4oSizeLimit };
313
303
 
314
304
  //# sourceMappingURL=common.mjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/common.mjs","sources":["webpack://@midscene/core/./src/ai-model/common.ts"],"sourcesContent":["import type {\n AIUsageInfo,\n BaseElement,\n DeviceAction,\n ElementTreeNode,\n MidsceneYamlFlowItem,\n PlanningAction,\n Rect,\n Size,\n} from '@/types';\nimport { assert } from '@midscene/shared/utils';\n\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport { callToGetJSONObject } from './service-caller/index';\n\nimport type { PlanningLocateParam } from '@/types';\nimport { NodeType } from '@midscene/shared/constants';\nimport {\n type IModelPreferences,\n getModelName,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { treeToList } from '@midscene/shared/extractor';\nimport { compositeElementInfoImg } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { z } from 'zod';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nexport enum AIActionType {\n ASSERT = 0,\n INSPECT_ELEMENT = 1,\n EXTRACT_DATA = 2,\n PLAN = 3,\n DESCRIBE_ELEMENT = 4,\n}\n\nexport async function callAiFn<T>(\n msgs: AIArgs,\n AIActionTypeValue: AIActionType,\n modelPreferences: IModelPreferences,\n): Promise<{ content: T; usage?: AIUsageInfo }> {\n const jsonObject = await callToGetJSONObject<T>(\n msgs,\n AIActionTypeValue,\n modelPreferences,\n );\n\n return {\n content: jsonObject.content,\n usage: jsonObject.usage,\n };\n}\n\nconst defaultBboxSize = 20; // must be even number\nconst debugInspectUtils = getDebug('ai:common');\n\n// transform the param of locate from qwen mode\nexport function fillBboxParam(\n locate: PlanningLocateParam,\n width: number,\n height: number,\n modelPreferences: IModelPreferences,\n) {\n // The Qwen model might have hallucinations of naming bbox as bbox_2d.\n if ((locate as any).bbox_2d && !locate?.bbox) {\n locate.bbox = (locate as any).bbox_2d;\n // biome-ignore lint/performance/noDelete: <explanation>\n delete (locate as any).bbox_2d;\n }\n\n if (locate?.bbox) {\n locate.bbox = adaptBbox(locate.bbox, width, height, modelPreferences);\n }\n\n return locate;\n}\n\nexport function adaptQwenBbox(\n bbox: number[],\n): [number, number, number, number] {\n if (bbox.length < 2) {\n const msg = `invalid bbox data for qwen-vl mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n }\n\n const result: [number, number, number, number] = [\n Math.round(bbox[0]),\n Math.round(bbox[1]),\n typeof bbox[2] === 'number'\n ? Math.round(bbox[2])\n : Math.round(bbox[0] + defaultBboxSize),\n typeof bbox[3] === 'number'\n ? Math.round(bbox[3])\n : Math.round(bbox[1] + defaultBboxSize),\n ];\n return result;\n}\n\nexport function adaptDoubaoBbox(\n bbox: string[] | number[] | string,\n width: number,\n height: number,\n): [number, number, number, number] {\n assert(\n width > 0 && height > 0,\n 'width and height must be greater than 0 in doubao mode',\n );\n\n if (typeof bbox === 'string') {\n assert(\n /^(\\d+)\\s(\\d+)\\s(\\d+)\\s(\\d+)$/.test(bbox.trim()),\n `invalid bbox data string for doubao-vision mode: ${bbox}`,\n );\n const splitted = bbox.split(' ');\n if (splitted.length === 4) {\n return [\n Math.round((Number(splitted[0]) * width) / 1000),\n Math.round((Number(splitted[1]) * height) / 1000),\n Math.round((Number(splitted[2]) * width) / 1000),\n Math.round((Number(splitted[3]) * height) / 1000),\n ];\n }\n throw new Error(`invalid bbox data string for doubao-vision mode: ${bbox}`);\n }\n\n if (Array.isArray(bbox) && Array.isArray(bbox[0])) {\n bbox = bbox[0];\n }\n\n let bboxList: number[] = [];\n if (Array.isArray(bbox) && typeof bbox[0] === 'string') {\n bbox.forEach((item) => {\n if (typeof item === 'string' && item.includes(',')) {\n const [x, y] = item.split(',');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else if (typeof item === 'string' && item.includes(' ')) {\n const [x, y] = item.split(' ');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else {\n bboxList.push(Number(item));\n }\n });\n } else {\n bboxList = bbox as any;\n }\n\n if (bboxList.length === 4 || bboxList.length === 5) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[2] * width) / 1000),\n Math.round((bboxList[3] * height) / 1000),\n ];\n }\n\n // treat the bbox as a center point\n if (\n bboxList.length === 6 ||\n bboxList.length === 2 ||\n bboxList.length === 3 ||\n bboxList.length === 7\n ) {\n return [\n Math.max(\n 0,\n Math.round((bboxList[0] * width) / 1000) - defaultBboxSize / 2,\n ),\n Math.max(\n 0,\n Math.round((bboxList[1] * height) / 1000) - defaultBboxSize / 2,\n ),\n Math.min(\n width,\n Math.round((bboxList[0] * width) / 1000) + defaultBboxSize / 2,\n ),\n Math.min(\n height,\n Math.round((bboxList[1] * height) / 1000) + defaultBboxSize / 2,\n ),\n ];\n }\n\n if (bbox.length === 8) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[4] * width) / 1000),\n Math.round((bboxList[5] * height) / 1000),\n ];\n }\n\n const msg = `invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n}\n\nexport function adaptBbox(\n bbox: number[],\n width: number,\n height: number,\n modelPreferences: IModelPreferences,\n): [number, number, number, number] {\n if (\n vlLocateMode(modelPreferences) === 'doubao-vision' ||\n vlLocateMode(modelPreferences) === 'vlm-ui-tars'\n ) {\n return adaptDoubaoBbox(bbox, width, height);\n }\n\n if (vlLocateMode(modelPreferences) === 'gemini') {\n return adaptGeminiBbox(bbox, width, height);\n }\n\n return adaptQwenBbox(bbox);\n}\n\nexport function adaptGeminiBbox(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n const left = Math.round((bbox[1] * width) / 1000);\n const top = Math.round((bbox[0] * height) / 1000);\n const right = Math.round((bbox[3] * width) / 1000);\n const bottom = Math.round((bbox[2] * height) / 1000);\n return [left, top, right, bottom];\n}\n\nexport function adaptBboxToRect(\n bbox: number[],\n width: number,\n height: number,\n modelPreferences: IModelPreferences,\n offsetX = 0,\n offsetY = 0,\n): Rect {\n debugInspectUtils('adaptBboxToRect', bbox, width, height, offsetX, offsetY);\n const [left, top, right, bottom] = adaptBbox(\n bbox,\n width,\n height,\n modelPreferences,\n );\n\n // Calculate initial rect dimensions\n const rectLeft = left;\n const rectTop = top;\n let rectWidth = right - left;\n let rectHeight = bottom - top;\n\n // Ensure the rect doesn't exceed image boundaries\n // If right edge exceeds width, adjust the width\n if (rectLeft + rectWidth > width) {\n rectWidth = width - rectLeft;\n }\n\n // If bottom edge exceeds height, adjust the height\n if (rectTop + rectHeight > height) {\n rectHeight = height - rectTop;\n }\n\n // Ensure minimum dimensions (width and height should be at least 1)\n rectWidth = Math.max(1, rectWidth);\n rectHeight = Math.max(1, rectHeight);\n\n const rect = {\n left: rectLeft + offsetX,\n top: rectTop + offsetY,\n width: rectWidth,\n height: rectHeight,\n };\n debugInspectUtils('adaptBboxToRect, result=', rect);\n return rect;\n}\n\nlet warned = false;\nexport function warnGPT4oSizeLimit(\n size: Size,\n modelPreferences: IModelPreferences,\n) {\n if (warned) return;\n if (getModelName(modelPreferences)?.toLowerCase().includes('gpt-4o')) {\n const warningMsg = `GPT-4o has a maximum image input size of 2000x768 or 768x2000, but got ${size.width}x${size.height}. Please set your interface to a smaller resolution. Otherwise, the result may be inaccurate.`;\n\n if (\n Math.max(size.width, size.height) > 2000 ||\n Math.min(size.width, size.height) > 768\n ) {\n console.warn(warningMsg);\n warned = true;\n }\n } else if (size.width > 1800 || size.height > 1800) {\n console.warn(\n `The image size seems too large (${size.width}x${size.height}). It may lead to more token usage, slower response, and inaccurate result.`,\n );\n warned = true;\n }\n}\n\nexport function mergeRects(rects: Rect[]) {\n const minLeft = Math.min(...rects.map((r) => r.left));\n const minTop = Math.min(...rects.map((r) => r.top));\n const maxRight = Math.max(...rects.map((r) => r.left + r.width));\n const maxBottom = Math.max(...rects.map((r) => r.top + r.height));\n return {\n left: minLeft,\n top: minTop,\n width: maxRight - minLeft,\n height: maxBottom - minTop,\n };\n}\n\n// expand the search area to at least 300 x 300, or add a default padding\nexport function expandSearchArea(\n rect: Rect,\n screenSize: Size,\n modelPreferences: IModelPreferences,\n) {\n const minEdgeSize =\n vlLocateMode(modelPreferences) === 'doubao-vision' ? 500 : 300;\n const defaultPadding = 160;\n\n const paddingSizeHorizontal =\n rect.width < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.width) / 2)\n : defaultPadding;\n const paddingSizeVertical =\n rect.height < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.height) / 2)\n : defaultPadding;\n rect.left = Math.max(0, rect.left - paddingSizeHorizontal);\n rect.width = Math.min(\n rect.width + paddingSizeHorizontal * 2,\n screenSize.width - rect.left,\n );\n rect.top = Math.max(0, rect.top - paddingSizeVertical);\n rect.height = Math.min(\n rect.height + paddingSizeVertical * 2,\n screenSize.height - rect.top,\n );\n return rect;\n}\n\nexport async function markupImageForLLM(\n screenshotBase64: string,\n tree: ElementTreeNode<BaseElement>,\n size: Size,\n) {\n const elementsInfo = treeToList(tree);\n const elementsPositionInfoWithoutText = elementsInfo!.filter(\n (elementInfo) => {\n if (elementInfo.attributes.nodeType === NodeType.TEXT) {\n return false;\n }\n return true;\n },\n );\n\n const imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n elementsPositionInfo: elementsPositionInfoWithoutText,\n size,\n });\n return imagePayload;\n}\n\nexport function buildYamlFlowFromPlans(\n plans: PlanningAction[],\n actionSpace: DeviceAction<any>[],\n sleep?: number,\n): MidsceneYamlFlowItem[] {\n const flow: MidsceneYamlFlowItem[] = [];\n\n for (const plan of plans) {\n const verb = plan.type;\n\n const action = actionSpace.find((action) => action.name === verb);\n if (!action) {\n console.warn(\n `Cannot convert action ${verb} to yaml flow. Will ignore it.`,\n );\n continue;\n }\n\n const flowKey = action.interfaceAlias || verb;\n const flowParam = action.paramSchema\n ? dumpActionParam(plan.param || {}, action.paramSchema)\n : {};\n\n const flowItem: MidsceneYamlFlowItem = {\n [flowKey]: '',\n ...flowParam,\n };\n\n flow.push(flowItem);\n }\n\n if (sleep) {\n flow.push({\n sleep,\n });\n }\n\n return flow;\n}\n\n// Zod schemas for shared types\nexport const PointSchema = z.object({\n left: z.number(),\n top: z.number(),\n});\n\nexport const SizeSchema = z.object({\n width: z.number(),\n height: z.number(),\n dpr: z.number().optional(),\n});\n\nexport const RectSchema = PointSchema.and(SizeSchema).and(\n z.object({\n zoom: z.number().optional(),\n }),\n);\n\n// Zod schema for TMultimodalPrompt\nexport const TMultimodalPromptSchema = z.object({\n images: z\n .array(\n z.object({\n name: z.string(),\n url: z.string(),\n }),\n )\n .optional(),\n convertHttpImage2Base64: z.boolean().optional(),\n});\n\n// Zod schema for TUserPrompt\nexport const TUserPromptSchema = z.union([\n z.string(),\n z\n .object({\n prompt: z.string(),\n })\n .and(TMultimodalPromptSchema.partial()),\n]);\n\n// Generate TypeScript types from Zod schemas\nexport type TMultimodalPrompt = z.infer<typeof TMultimodalPromptSchema>;\nexport type TUserPrompt = z.infer<typeof TUserPromptSchema>;\n\nconst locateFieldFlagName = 'midscene_location_field_flag';\n\nconst MidsceneLocationResult = z\n .object({\n [locateFieldFlagName]: z.literal(true),\n prompt: TUserPromptSchema,\n\n // optional fields\n deepThink: z.boolean().optional(), // only available in vl model\n cacheable: z.boolean().optional(),\n xpath: z.boolean().optional(), // preset result for xpath\n\n // these two fields will only appear in the result\n center: z.tuple([z.number(), z.number()]),\n rect: RectSchema,\n })\n .passthrough();\n\nexport type MidsceneLocationResultType = z.infer<typeof MidsceneLocationResult>;\nexport const getMidsceneLocationSchema = () => {\n return MidsceneLocationResult;\n};\n\nexport const ifMidsceneLocatorField = (field: any): boolean => {\n // Handle optional fields by getting the inner type\n let actualField = field;\n if (actualField._def?.typeName === 'ZodOptional') {\n actualField = actualField._def.innerType;\n }\n\n // Check if this is a ZodUnion (the new MidsceneLocation structure)\n if (actualField._def?.typeName === 'ZodObject') {\n const shape = actualField._def.shape();\n return locateFieldFlagName in shape;\n }\n\n return false;\n};\n\nexport const dumpMidsceneLocatorField = (field: any): string => {\n assert(\n ifMidsceneLocatorField(field),\n 'field is not a midscene locator field',\n );\n\n // If field is a string, return it directly\n if (typeof field === 'string') {\n return field;\n }\n\n // If field is an object with prompt property\n if (field && typeof field === 'object' && field.prompt) {\n // If prompt is a string, return it directly\n if (typeof field.prompt === 'string') {\n return field.prompt;\n }\n // If prompt is a TUserPrompt object, extract the prompt string\n if (typeof field.prompt === 'object' && field.prompt.prompt) {\n return field.prompt.prompt; // TODO: dump images if necessary\n }\n }\n\n // Fallback: try to convert to string\n return String(field);\n};\n\nexport const findAllMidsceneLocatorField = (\n zodType?: z.ZodType<any>,\n requiredOnly?: boolean,\n): string[] => {\n if (!zodType) {\n return [];\n }\n\n // Check if this is a ZodObject by checking if it has a shape property\n const zodObject = zodType as any;\n if (zodObject._def?.typeName === 'ZodObject' && zodObject.shape) {\n const keys = Object.keys(zodObject.shape);\n return keys.filter((key) => {\n const field = zodObject.shape[key];\n if (!ifMidsceneLocatorField(field)) {\n return false;\n }\n\n // If requiredOnly is true, filter out optional fields\n if (requiredOnly) {\n return field._def?.typeName !== 'ZodOptional';\n }\n\n return true;\n });\n }\n\n // For other ZodType instances, we can't extract field names\n return [];\n};\n\nexport const dumpActionParam = (\n jsonObject: Record<string, any>,\n zodSchema: z.ZodType<any>,\n): Record<string, any> => {\n const locatorFields = findAllMidsceneLocatorField(zodSchema);\n const result = { ...jsonObject };\n\n for (const fieldName of locatorFields) {\n const fieldValue = result[fieldName];\n if (fieldValue) {\n // If it's already a string, keep it as is\n if (typeof fieldValue === 'string') {\n result[fieldName] = fieldValue;\n } else if (typeof fieldValue === 'object') {\n // Check if this field is actually a MidsceneLocationType object\n if (fieldValue.prompt) {\n // If prompt is a string, use it directly\n if (typeof fieldValue.prompt === 'string') {\n result[fieldName] = fieldValue.prompt;\n } else if (\n typeof fieldValue.prompt === 'object' &&\n fieldValue.prompt.prompt\n ) {\n // If prompt is a TUserPrompt object, extract the prompt string\n result[fieldName] = fieldValue.prompt.prompt;\n }\n }\n }\n }\n }\n\n return result;\n};\n\nexport const loadActionParam = (\n jsonObject: Record<string, any>,\n zodSchema: z.ZodType<any>,\n): Record<string, any> => {\n const locatorFields = findAllMidsceneLocatorField(zodSchema);\n const result = { ...jsonObject };\n\n for (const fieldName of locatorFields) {\n const fieldValue = result[fieldName];\n if (fieldValue && typeof fieldValue === 'string') {\n result[fieldName] = {\n [locateFieldFlagName]: true,\n prompt: fieldValue,\n };\n }\n }\n\n return result;\n};\n"],"names":["AIActionType","callAiFn","msgs","AIActionTypeValue","modelPreferences","jsonObject","callToGetJSONObject","defaultBboxSize","debugInspectUtils","getDebug","fillBboxParam","locate","width","height","adaptBbox","adaptQwenBbox","bbox","msg","JSON","Error","result","Math","adaptDoubaoBbox","assert","splitted","Number","Array","bboxList","item","x","y","vlLocateMode","adaptGeminiBbox","left","top","right","bottom","adaptBboxToRect","offsetX","offsetY","rectLeft","rectTop","rectWidth","rectHeight","rect","warned","warnGPT4oSizeLimit","size","_getModelName","warningMsg","console","mergeRects","rects","minLeft","r","minTop","maxRight","maxBottom","expandSearchArea","screenSize","minEdgeSize","defaultPadding","paddingSizeHorizontal","paddingSizeVertical","markupImageForLLM","screenshotBase64","tree","elementsInfo","treeToList","elementsPositionInfoWithoutText","elementInfo","NodeType","imagePayload","compositeElementInfoImg","buildYamlFlowFromPlans","plans","actionSpace","sleep","flow","plan","verb","action","flowKey","flowParam","dumpActionParam","flowItem","PointSchema","z","SizeSchema","RectSchema","TMultimodalPromptSchema","TUserPromptSchema","locateFieldFlagName","MidsceneLocationResult","getMidsceneLocationSchema","ifMidsceneLocatorField","field","_actualField__def","_actualField__def1","actualField","shape","dumpMidsceneLocatorField","String","findAllMidsceneLocatorField","zodType","requiredOnly","_zodObject__def","zodObject","keys","Object","key","_field__def","zodSchema","locatorFields","fieldName","fieldValue","loadActionParam"],"mappings":";;;;;;;;AAmCO,IAAKA,sBAAYA,WAAAA,GAAAA,SAAZA,YAAY;;;;;;WAAZA;;AAQL,eAAeC,SACpBC,IAAY,EACZC,iBAA+B,EAC/BC,gBAAmC;IAEnC,MAAMC,aAAa,MAAMC,oBACvBJ,MACAC,mBACAC;IAGF,OAAO;QACL,SAASC,WAAW,OAAO;QAC3B,OAAOA,WAAW,KAAK;IACzB;AACF;AAEA,MAAME,kBAAkB;AACxB,MAAMC,oBAAoBC,SAAS;AAG5B,SAASC,cACdC,MAA2B,EAC3BC,KAAa,EACbC,MAAc,EACdT,gBAAmC;IAGnC,IAAKO,OAAe,OAAO,IAAI,CAACA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,AAAD,GAAG;QAC5CA,OAAO,IAAI,GAAIA,OAAe,OAAO;QAErC,OAAQA,OAAe,OAAO;IAChC;IAEA,IAAIA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,EACdA,OAAO,IAAI,GAAGG,UAAUH,OAAO,IAAI,EAAEC,OAAOC,QAAQT;IAGtD,OAAOO;AACT;AAEO,SAASI,cACdC,IAAc;IAEd,IAAIA,KAAK,MAAM,GAAG,GAAG;QACnB,MAAMC,MAAM,CAAC,oCAAoC,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;QAC1E,MAAM,IAAIG,MAAMF;IAClB;IAEA,MAAMG,SAA2C;QAC/CC,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QACC,YAAnB,OAAOA,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGT;QACN,YAAnB,OAAOS,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGT;KAC1B;IACD,OAAOa;AACT;AAEO,SAASE,gBACdN,IAAkC,EAClCJ,KAAa,EACbC,MAAc;IAEdU,OACEX,QAAQ,KAAKC,SAAS,GACtB;IAGF,IAAI,AAAgB,YAAhB,OAAOG,MAAmB;QAC5BO,OACE,+BAA+B,IAAI,CAACP,KAAK,IAAI,KAC7C,CAAC,iDAAiD,EAAEA,MAAM;QAE5D,MAAMQ,WAAWR,KAAK,KAAK,CAAC;QAC5B,IAAIQ,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACLH,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIZ,QAAS;YAC3CS,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIX,SAAU;YAC5CQ,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIZ,QAAS;YAC3CS,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIX,SAAU;SAC7C;QAEH,MAAM,IAAIM,MAAM,CAAC,iDAAiD,EAAEH,MAAM;IAC5E;IAEA,IAAIU,MAAM,OAAO,CAACV,SAASU,MAAM,OAAO,CAACV,IAAI,CAAC,EAAE,GAC9CA,OAAOA,IAAI,CAAC,EAAE;IAGhB,IAAIW,WAAqB,EAAE;IAC3B,IAAID,MAAM,OAAO,CAACV,SAAS,AAAmB,YAAnB,OAAOA,IAAI,CAAC,EAAE,EACvCA,KAAK,OAAO,CAAC,CAACY;QACZ,IAAI,AAAgB,YAAhB,OAAOA,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YAClD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OAAO,IAAI,AAAgB,YAAhB,OAAOF,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YACzD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OACEH,SAAS,IAAI,CAACF,OAAOG;IAEzB;SAEAD,WAAWX;IAGb,IAAIW,AAAoB,MAApBA,SAAS,MAAM,IAAUA,AAAoB,MAApBA,SAAS,MAAM,EAC1C,OAAO;QACLN,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;QACpCQ,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;KACrC;IAIH,IACEc,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,EAEf,OAAO;QACLN,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS,QAAQL,kBAAkB;QAE/Dc,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU,QAAQN,kBAAkB;QAEhEc,KAAK,GAAG,CACNT,OACAS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS,QAAQL,kBAAkB;QAE/Dc,KAAK,GAAG,CACNR,QACAQ,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU,QAAQN,kBAAkB;KAEjE;IAGH,IAAIS,AAAgB,MAAhBA,KAAK,MAAM,EACb,OAAO;QACLK,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;QACpCQ,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;KACrC;IAGH,MAAMI,MAAM,CAAC,0CAA0C,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;IAChF,MAAM,IAAIG,MAAMF;AAClB;AAEO,SAASH,UACdE,IAAc,EACdJ,KAAa,EACbC,MAAc,EACdT,gBAAmC;IAEnC,IACE2B,AAAmC,oBAAnCA,aAAa3B,qBACb2B,AAAmC,kBAAnCA,aAAa3B,mBAEb,OAAOkB,gBAAgBN,MAAMJ,OAAOC;IAGtC,IAAIkB,AAAmC,aAAnCA,aAAa3B,mBACf,OAAO4B,gBAAgBhB,MAAMJ,OAAOC;IAGtC,OAAOE,cAAcC;AACvB;AAEO,SAASgB,gBACdhB,IAAc,EACdJ,KAAa,EACbC,MAAc;IAEd,MAAMoB,OAAOZ,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGJ,QAAS;IAC5C,MAAMsB,MAAMb,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGH,SAAU;IAC5C,MAAMsB,QAAQd,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGJ,QAAS;IAC7C,MAAMwB,SAASf,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGH,SAAU;IAC/C,OAAO;QAACoB;QAAMC;QAAKC;QAAOC;KAAO;AACnC;AAEO,SAASC,gBACdrB,IAAc,EACdJ,KAAa,EACbC,MAAc,EACdT,gBAAmC,EACnCkC,UAAU,CAAC,EACXC,UAAU,CAAC;IAEX/B,kBAAkB,mBAAmBQ,MAAMJ,OAAOC,QAAQyB,SAASC;IACnE,MAAM,CAACN,MAAMC,KAAKC,OAAOC,OAAO,GAAGtB,UACjCE,MACAJ,OACAC,QACAT;IAIF,MAAMoC,WAAWP;IACjB,MAAMQ,UAAUP;IAChB,IAAIQ,YAAYP,QAAQF;IACxB,IAAIU,aAAaP,SAASF;IAI1B,IAAIM,WAAWE,YAAY9B,OACzB8B,YAAY9B,QAAQ4B;IAItB,IAAIC,UAAUE,aAAa9B,QACzB8B,aAAa9B,SAAS4B;IAIxBC,YAAYrB,KAAK,GAAG,CAAC,GAAGqB;IACxBC,aAAatB,KAAK,GAAG,CAAC,GAAGsB;IAEzB,MAAMC,OAAO;QACX,MAAMJ,WAAWF;QACjB,KAAKG,UAAUF;QACf,OAAOG;QACP,QAAQC;IACV;IACAnC,kBAAkB,4BAA4BoC;IAC9C,OAAOA;AACT;AAEA,IAAIC,SAAS;AACN,SAASC,mBACdC,IAAU,EACV3C,gBAAmC;QAG/B4C;IADJ,IAAIH,QAAQ;IACZ,IAAI,QAAAG,CAAAA,gBAAAA,aAAa5C,iBAAgB,IAA7B4C,KAAAA,IAAAA,cAAgC,WAAW,GAAG,QAAQ,CAAC,WAAW;QACpE,MAAMC,aAAa,CAAC,uEAAuE,EAAEF,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,6FAA6F,CAAC;QAErN,IACE1B,KAAK,GAAG,CAAC0B,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,QACpC1B,KAAK,GAAG,CAAC0B,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,KACpC;YACAG,QAAQ,IAAI,CAACD;YACbJ,SAAS;QACX;IACF,OAAO,IAAIE,KAAK,KAAK,GAAG,QAAQA,KAAK,MAAM,GAAG,MAAM;QAClDG,QAAQ,IAAI,CACV,CAAC,gCAAgC,EAAEH,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,2EAA2E,CAAC;QAE3IF,SAAS;IACX;AACF;AAEO,SAASM,WAAWC,KAAa;IACtC,MAAMC,UAAUhC,KAAK,GAAG,IAAI+B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI;IACnD,MAAMC,SAASlC,KAAK,GAAG,IAAI+B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG;IACjD,MAAME,WAAWnC,KAAK,GAAG,IAAI+B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI,GAAGA,EAAE,KAAK;IAC9D,MAAMG,YAAYpC,KAAK,GAAG,IAAI+B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG,GAAGA,EAAE,MAAM;IAC/D,OAAO;QACL,MAAMD;QACN,KAAKE;QACL,OAAOC,WAAWH;QAClB,QAAQI,YAAYF;IACtB;AACF;AAGO,SAASG,iBACdd,IAAU,EACVe,UAAgB,EAChBvD,gBAAmC;IAEnC,MAAMwD,cACJ7B,AAAmC,oBAAnCA,aAAa3B,oBAAwC,MAAM;IAC7D,MAAMyD,iBAAiB;IAEvB,MAAMC,wBACJlB,KAAK,KAAK,GAAGgB,cACTvC,KAAK,IAAI,CAAEuC,AAAAA,CAAAA,cAAchB,KAAK,KAAI,IAAK,KACvCiB;IACN,MAAME,sBACJnB,KAAK,MAAM,GAAGgB,cACVvC,KAAK,IAAI,CAAEuC,AAAAA,CAAAA,cAAchB,KAAK,MAAK,IAAK,KACxCiB;IACNjB,KAAK,IAAI,GAAGvB,KAAK,GAAG,CAAC,GAAGuB,KAAK,IAAI,GAAGkB;IACpClB,KAAK,KAAK,GAAGvB,KAAK,GAAG,CACnBuB,KAAK,KAAK,GAAGkB,AAAwB,IAAxBA,uBACbH,WAAW,KAAK,GAAGf,KAAK,IAAI;IAE9BA,KAAK,GAAG,GAAGvB,KAAK,GAAG,CAAC,GAAGuB,KAAK,GAAG,GAAGmB;IAClCnB,KAAK,MAAM,GAAGvB,KAAK,GAAG,CACpBuB,KAAK,MAAM,GAAGmB,AAAsB,IAAtBA,qBACdJ,WAAW,MAAM,GAAGf,KAAK,GAAG;IAE9B,OAAOA;AACT;AAEO,eAAeoB,kBACpBC,gBAAwB,EACxBC,IAAkC,EAClCnB,IAAU;IAEV,MAAMoB,eAAeC,WAAWF;IAChC,MAAMG,kCAAkCF,aAAc,MAAM,CAC1D,CAACG;QACC,IAAIA,YAAY,UAAU,CAAC,QAAQ,KAAKC,SAAS,IAAI,EACnD,OAAO;QAET,OAAO;IACT;IAGF,MAAMC,eAAe,MAAMC,wBAAwB;QACjD,gBAAgBR;QAChB,sBAAsBI;QACtBtB;IACF;IACA,OAAOyB;AACT;AAEO,SAASE,uBACdC,KAAuB,EACvBC,WAAgC,EAChCC,KAAc;IAEd,MAAMC,OAA+B,EAAE;IAEvC,KAAK,MAAMC,QAAQJ,MAAO;QACxB,MAAMK,OAAOD,KAAK,IAAI;QAEtB,MAAME,SAASL,YAAY,IAAI,CAAC,CAACK,SAAWA,OAAO,IAAI,KAAKD;QAC5D,IAAI,CAACC,QAAQ;YACX/B,QAAQ,IAAI,CACV,CAAC,sBAAsB,EAAE8B,KAAK,8BAA8B,CAAC;YAE/D;QACF;QAEA,MAAME,UAAUD,OAAO,cAAc,IAAID;QACzC,MAAMG,YAAYF,OAAO,WAAW,GAChCG,gBAAgBL,KAAK,KAAK,IAAI,CAAC,GAAGE,OAAO,WAAW,IACpD,CAAC;QAEL,MAAMI,WAAiC;YACrC,CAACH,QAAQ,EAAE;YACX,GAAGC,SAAS;QACd;QAEAL,KAAK,IAAI,CAACO;IACZ;IAEA,IAAIR,OACFC,KAAK,IAAI,CAAC;QACRD;IACF;IAGF,OAAOC;AACT;AAGO,MAAMQ,cAAcC,EAAE,MAAM,CAAC;IAClC,MAAMA,EAAE,MAAM;IACd,KAAKA,EAAE,MAAM;AACf;AAEO,MAAMC,aAAaD,EAAE,MAAM,CAAC;IACjC,OAAOA,EAAE,MAAM;IACf,QAAQA,EAAE,MAAM;IAChB,KAAKA,EAAE,MAAM,GAAG,QAAQ;AAC1B;AAEO,MAAME,aAAaH,YAAY,GAAG,CAACE,YAAY,GAAG,CACvDD,EAAE,MAAM,CAAC;IACP,MAAMA,EAAE,MAAM,GAAG,QAAQ;AAC3B;AAIK,MAAMG,0BAA0BH,EAAE,MAAM,CAAC;IAC9C,QAAQA,EAAAA,KACA,CACJA,EAAE,MAAM,CAAC;QACP,MAAMA,EAAE,MAAM;QACd,KAAKA,EAAE,MAAM;IACf,IAED,QAAQ;IACX,yBAAyBA,EAAE,OAAO,GAAG,QAAQ;AAC/C;AAGO,MAAMI,oBAAoBJ,EAAE,KAAK,CAAC;IACvCA,EAAE,MAAM;IACRA,EAAAA,MACS,CAAC;QACN,QAAQA,EAAE,MAAM;IAClB,GACC,GAAG,CAACG,wBAAwB,OAAO;CACvC;AAMD,MAAME,sBAAsB;AAE5B,MAAMC,yBAAyBN,EAAAA,MACtB,CAAC;IACN,CAACK,oBAAoB,EAAEL,EAAE,OAAO,CAAC;IACjC,QAAQI;IAGR,WAAWJ,EAAE,OAAO,GAAG,QAAQ;IAC/B,WAAWA,EAAE,OAAO,GAAG,QAAQ;IAC/B,OAAOA,EAAE,OAAO,GAAG,QAAQ;IAG3B,QAAQA,EAAE,KAAK,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,MAAM;KAAG;IACxC,MAAME;AACR,GACC,WAAW;AAGP,MAAMK,4BAA4B,IAChCD;AAGF,MAAME,yBAAyB,CAACC;QAGjCC,mBAKAC;IANJ,IAAIC,cAAcH;IAClB,IAAIC,AAAAA,SAAAA,CAAAA,oBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,kBAAkB,QAAQ,AAAD,MAAM,eACjCE,cAAcA,YAAY,IAAI,CAAC,SAAS;IAI1C,IAAID,AAAAA,SAAAA,CAAAA,qBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,mBAAkB,QAAQ,AAAD,MAAM,aAAa;QAC9C,MAAME,QAAQD,YAAY,IAAI,CAAC,KAAK;QACpC,OAAOP,uBAAuBQ;IAChC;IAEA,OAAO;AACT;AAEO,MAAMC,2BAA2B,CAACL;IACvCzE,OACEwE,uBAAuBC,QACvB;IAIF,IAAI,AAAiB,YAAjB,OAAOA,OACT,OAAOA;IAIT,IAAIA,SAAS,AAAiB,YAAjB,OAAOA,SAAsBA,MAAM,MAAM,EAAE;QAEtD,IAAI,AAAwB,YAAxB,OAAOA,MAAM,MAAM,EACrB,OAAOA,MAAM,MAAM;QAGrB,IAAI,AAAwB,YAAxB,OAAOA,MAAM,MAAM,IAAiBA,MAAM,MAAM,CAAC,MAAM,EACzD,OAAOA,MAAM,MAAM,CAAC,MAAM;IAE9B;IAGA,OAAOM,OAAON;AAChB;AAEO,MAAMO,8BAA8B,CACzCC,SACAC;QAQIC;IANJ,IAAI,CAACF,SACH,OAAO,EAAE;IAIX,MAAMG,YAAYH;IAClB,IAAIE,AAAAA,SAAAA,CAAAA,kBAAAA,UAAU,IAAI,AAAD,IAAbA,KAAAA,IAAAA,gBAAgB,QAAQ,AAAD,MAAM,eAAeC,UAAU,KAAK,EAAE;QAC/D,MAAMC,OAAOC,OAAO,IAAI,CAACF,UAAU,KAAK;QACxC,OAAOC,KAAK,MAAM,CAAC,CAACE;YAClB,MAAMd,QAAQW,UAAU,KAAK,CAACG,IAAI;YAClC,IAAI,CAACf,uBAAuBC,QAC1B,OAAO;YAIT,IAAIS,cAAc;oBACTM;gBAAP,OAAOA,AAAAA,SAAAA,CAAAA,cAAAA,MAAM,IAAI,AAAD,IAATA,KAAAA,IAAAA,YAAY,QAAQ,AAAD,MAAM;YAClC;YAEA,OAAO;QACT;IACF;IAGA,OAAO,EAAE;AACX;AAEO,MAAM3B,kBAAkB,CAC7B/E,YACA2G;IAEA,MAAMC,gBAAgBV,4BAA4BS;IAClD,MAAM5F,SAAS;QAAE,GAAGf,UAAU;IAAC;IAE/B,KAAK,MAAM6G,aAAaD,cAAe;QACrC,MAAME,aAAa/F,MAAM,CAAC8F,UAAU;QACpC,IAAIC,YAEF;YAAA,IAAI,AAAsB,YAAtB,OAAOA,YACT/F,MAAM,CAAC8F,UAAU,GAAGC;iBACf,IAAI,AAAsB,YAAtB,OAAOA,YAEhB;gBAAA,IAAIA,WAAW,MAAM,EAEnB;oBAAA,IAAI,AAA6B,YAA7B,OAAOA,WAAW,MAAM,EAC1B/F,MAAM,CAAC8F,UAAU,GAAGC,WAAW,MAAM;yBAChC,IACL,AAA6B,YAA7B,OAAOA,WAAW,MAAM,IACxBA,WAAW,MAAM,CAAC,MAAM,EAGxB/F,MAAM,CAAC8F,UAAU,GAAGC,WAAW,MAAM,CAAC,MAAM;gBAC9C;YACF;QACF;IAEJ;IAEA,OAAO/F;AACT;AAEO,MAAMgG,kBAAkB,CAC7B/G,YACA2G;IAEA,MAAMC,gBAAgBV,4BAA4BS;IAClD,MAAM5F,SAAS;QAAE,GAAGf,UAAU;IAAC;IAE/B,KAAK,MAAM6G,aAAaD,cAAe;QACrC,MAAME,aAAa/F,MAAM,CAAC8F,UAAU;QACpC,IAAIC,cAAc,AAAsB,YAAtB,OAAOA,YACvB/F,MAAM,CAAC8F,UAAU,GAAG;YAClB,CAACtB,oBAAoB,EAAE;YACvB,QAAQuB;QACV;IAEJ;IAEA,OAAO/F;AACT"}
1
+ {"version":3,"file":"ai-model/common.mjs","sources":["webpack://@midscene/core/./src/ai-model/common.ts"],"sourcesContent":["import type {\n AIUsageInfo,\n BaseElement,\n DeviceAction,\n ElementTreeNode,\n MidsceneYamlFlowItem,\n PlanningAction,\n Rect,\n Size,\n} from '@/types';\nimport { assert } from '@midscene/shared/utils';\n\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\n\nimport type { PlanningLocateParam } from '@/types';\nimport { NodeType } from '@midscene/shared/constants';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport { treeToList } from '@midscene/shared/extractor';\nimport { compositeElementInfoImg } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { z } from 'zod';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nexport enum AIActionType {\n ASSERT = 0,\n INSPECT_ELEMENT = 1,\n EXTRACT_DATA = 2,\n PLAN = 3,\n DESCRIBE_ELEMENT = 4,\n}\n\nconst defaultBboxSize = 20; // must be even number\nconst debugInspectUtils = getDebug('ai:common');\n\n// transform the param of locate from qwen mode\nexport function fillBboxParam(\n locate: PlanningLocateParam,\n width: number,\n height: number,\n vlMode: TVlModeTypes | undefined,\n) {\n // The Qwen model might have hallucinations of naming bbox as bbox_2d.\n if ((locate as any).bbox_2d && !locate?.bbox) {\n locate.bbox = (locate as any).bbox_2d;\n // biome-ignore lint/performance/noDelete: <explanation>\n delete (locate as any).bbox_2d;\n }\n\n if (locate?.bbox) {\n locate.bbox = adaptBbox(locate.bbox, width, height, vlMode);\n }\n\n return locate;\n}\n\nexport function adaptQwenBbox(\n bbox: number[],\n): [number, number, number, number] {\n if (bbox.length < 2) {\n const msg = `invalid bbox data for qwen-vl mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n }\n\n const result: [number, number, number, number] = [\n Math.round(bbox[0]),\n Math.round(bbox[1]),\n typeof bbox[2] === 'number'\n ? Math.round(bbox[2])\n : Math.round(bbox[0] + defaultBboxSize),\n typeof bbox[3] === 'number'\n ? Math.round(bbox[3])\n : Math.round(bbox[1] + defaultBboxSize),\n ];\n return result;\n}\n\nexport function adaptDoubaoBbox(\n bbox: string[] | number[] | string,\n width: number,\n height: number,\n): [number, number, number, number] {\n assert(\n width > 0 && height > 0,\n 'width and height must be greater than 0 in doubao mode',\n );\n\n if (typeof bbox === 'string') {\n assert(\n /^(\\d+)\\s(\\d+)\\s(\\d+)\\s(\\d+)$/.test(bbox.trim()),\n `invalid bbox data string for doubao-vision mode: ${bbox}`,\n );\n const splitted = bbox.split(' ');\n if (splitted.length === 4) {\n return [\n Math.round((Number(splitted[0]) * width) / 1000),\n Math.round((Number(splitted[1]) * height) / 1000),\n Math.round((Number(splitted[2]) * width) / 1000),\n Math.round((Number(splitted[3]) * height) / 1000),\n ];\n }\n throw new Error(`invalid bbox data string for doubao-vision mode: ${bbox}`);\n }\n\n if (Array.isArray(bbox) && Array.isArray(bbox[0])) {\n bbox = bbox[0];\n }\n\n let bboxList: number[] = [];\n if (Array.isArray(bbox) && typeof bbox[0] === 'string') {\n bbox.forEach((item) => {\n if (typeof item === 'string' && item.includes(',')) {\n const [x, y] = item.split(',');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else if (typeof item === 'string' && item.includes(' ')) {\n const [x, y] = item.split(' ');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else {\n bboxList.push(Number(item));\n }\n });\n } else {\n bboxList = bbox as any;\n }\n\n if (bboxList.length === 4 || bboxList.length === 5) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[2] * width) / 1000),\n Math.round((bboxList[3] * height) / 1000),\n ];\n }\n\n // treat the bbox as a center point\n if (\n bboxList.length === 6 ||\n bboxList.length === 2 ||\n bboxList.length === 3 ||\n bboxList.length === 7\n ) {\n return [\n Math.max(\n 0,\n Math.round((bboxList[0] * width) / 1000) - defaultBboxSize / 2,\n ),\n Math.max(\n 0,\n Math.round((bboxList[1] * height) / 1000) - defaultBboxSize / 2,\n ),\n Math.min(\n width,\n Math.round((bboxList[0] * width) / 1000) + defaultBboxSize / 2,\n ),\n Math.min(\n height,\n Math.round((bboxList[1] * height) / 1000) + defaultBboxSize / 2,\n ),\n ];\n }\n\n if (bbox.length === 8) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[4] * width) / 1000),\n Math.round((bboxList[5] * height) / 1000),\n ];\n }\n\n const msg = `invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n}\n\nexport function adaptBbox(\n bbox: number[],\n width: number,\n height: number,\n vlMode: TVlModeTypes | undefined,\n): [number, number, number, number] {\n if (vlMode === 'doubao-vision' || vlMode === 'vlm-ui-tars') {\n return adaptDoubaoBbox(bbox, width, height);\n }\n\n if (vlMode === 'gemini') {\n return adaptGeminiBbox(bbox, width, height);\n }\n\n return adaptQwenBbox(bbox);\n}\n\nexport function adaptGeminiBbox(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n const left = Math.round((bbox[1] * width) / 1000);\n const top = Math.round((bbox[0] * height) / 1000);\n const right = Math.round((bbox[3] * width) / 1000);\n const bottom = Math.round((bbox[2] * height) / 1000);\n return [left, top, right, bottom];\n}\n\nexport function adaptBboxToRect(\n bbox: number[],\n width: number,\n height: number,\n offsetX = 0,\n offsetY = 0,\n vlMode?: TVlModeTypes | undefined,\n): Rect {\n debugInspectUtils('adaptBboxToRect', bbox, width, height, offsetX, offsetY);\n const [left, top, right, bottom] = adaptBbox(bbox, width, height, vlMode);\n\n // Calculate initial rect dimensions\n const rectLeft = left + offsetX;\n const rectTop = top + offsetY;\n let rectWidth = right - left;\n let rectHeight = bottom - top;\n\n // Ensure the rect doesn't exceed image boundaries\n // If right edge exceeds width, adjust the width\n if (rectLeft + rectWidth > width) {\n rectWidth = width - rectLeft;\n }\n\n // If bottom edge exceeds height, adjust the height\n if (rectTop + rectHeight > height) {\n rectHeight = height - rectTop;\n }\n\n // Ensure minimum dimensions (width and height should be at least 1)\n rectWidth = Math.max(1, rectWidth);\n rectHeight = Math.max(1, rectHeight);\n\n const rect = {\n left: rectLeft,\n top: rectTop,\n width: rectWidth,\n height: rectHeight,\n };\n debugInspectUtils('adaptBboxToRect, result=', rect);\n return rect;\n}\n\nlet warned = false;\nexport function warnGPT4oSizeLimit(size: Size, modelName: string) {\n if (warned) return;\n if (modelName.toLowerCase().includes('gpt-4o')) {\n const warningMsg = `GPT-4o has a maximum image input size of 2000x768 or 768x2000, but got ${size.width}x${size.height}. Please set your interface to a smaller resolution. Otherwise, the result may be inaccurate.`;\n\n if (\n Math.max(size.width, size.height) > 2000 ||\n Math.min(size.width, size.height) > 768\n ) {\n console.warn(warningMsg);\n warned = true;\n }\n } else if (size.width > 1800 || size.height > 1800) {\n console.warn(\n `The image size seems too large (${size.width}x${size.height}). It may lead to more token usage, slower response, and inaccurate result.`,\n );\n warned = true;\n }\n}\n\nexport function mergeRects(rects: Rect[]) {\n const minLeft = Math.min(...rects.map((r) => r.left));\n const minTop = Math.min(...rects.map((r) => r.top));\n const maxRight = Math.max(...rects.map((r) => r.left + r.width));\n const maxBottom = Math.max(...rects.map((r) => r.top + r.height));\n return {\n left: minLeft,\n top: minTop,\n width: maxRight - minLeft,\n height: maxBottom - minTop,\n };\n}\n\n// expand the search area to at least 300 x 300, or add a default padding\nexport function expandSearchArea(\n rect: Rect,\n screenSize: Size,\n vlMode: TVlModeTypes | undefined,\n) {\n const minEdgeSize = vlMode === 'doubao-vision' ? 500 : 300;\n const defaultPadding = 160;\n\n const paddingSizeHorizontal =\n rect.width < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.width) / 2)\n : defaultPadding;\n const paddingSizeVertical =\n rect.height < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.height) / 2)\n : defaultPadding;\n rect.left = Math.max(0, rect.left - paddingSizeHorizontal);\n rect.width = Math.min(\n rect.width + paddingSizeHorizontal * 2,\n screenSize.width - rect.left,\n );\n rect.top = Math.max(0, rect.top - paddingSizeVertical);\n rect.height = Math.min(\n rect.height + paddingSizeVertical * 2,\n screenSize.height - rect.top,\n );\n return rect;\n}\n\nexport async function markupImageForLLM(\n screenshotBase64: string,\n tree: ElementTreeNode<BaseElement>,\n size: Size,\n) {\n const elementsInfo = treeToList(tree);\n const elementsPositionInfoWithoutText = elementsInfo!.filter(\n (elementInfo) => {\n if (elementInfo.attributes.nodeType === NodeType.TEXT) {\n return false;\n }\n return true;\n },\n );\n\n const imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n elementsPositionInfo: elementsPositionInfoWithoutText,\n size,\n });\n return imagePayload;\n}\n\nexport function buildYamlFlowFromPlans(\n plans: PlanningAction[],\n actionSpace: DeviceAction<any>[],\n sleep?: number,\n): MidsceneYamlFlowItem[] {\n const flow: MidsceneYamlFlowItem[] = [];\n\n for (const plan of plans) {\n const verb = plan.type;\n\n const action = actionSpace.find((action) => action.name === verb);\n if (!action) {\n console.warn(\n `Cannot convert action ${verb} to yaml flow. Will ignore it.`,\n );\n continue;\n }\n\n const flowKey = action.interfaceAlias || verb;\n const flowParam = action.paramSchema\n ? dumpActionParam(plan.param || {}, action.paramSchema)\n : {};\n\n const flowItem: MidsceneYamlFlowItem = {\n [flowKey]: '',\n ...flowParam,\n };\n\n flow.push(flowItem);\n }\n\n if (sleep) {\n flow.push({\n sleep,\n });\n }\n\n return flow;\n}\n\n// Zod schemas for shared types\nexport const PointSchema = z.object({\n left: z.number(),\n top: z.number(),\n});\n\nexport const SizeSchema = z.object({\n width: z.number(),\n height: z.number(),\n dpr: z.number().optional(),\n});\n\nexport const RectSchema = PointSchema.and(SizeSchema).and(\n z.object({\n zoom: z.number().optional(),\n }),\n);\n\n// Zod schema for TMultimodalPrompt\nexport const TMultimodalPromptSchema = z.object({\n images: z\n .array(\n z.object({\n name: z.string(),\n url: z.string(),\n }),\n )\n .optional(),\n convertHttpImage2Base64: z.boolean().optional(),\n});\n\n// Zod schema for TUserPrompt\nexport const TUserPromptSchema = z.union([\n z.string(),\n z\n .object({\n prompt: z.string(),\n })\n .and(TMultimodalPromptSchema.partial()),\n]);\n\n// Generate TypeScript types from Zod schemas\nexport type TMultimodalPrompt = z.infer<typeof TMultimodalPromptSchema>;\nexport type TUserPrompt = z.infer<typeof TUserPromptSchema>;\n\nconst locateFieldFlagName = 'midscene_location_field_flag';\n\nconst MidsceneLocationResult = z\n .object({\n [locateFieldFlagName]: z.literal(true),\n prompt: TUserPromptSchema,\n\n // optional fields\n deepThink: z.boolean().optional(), // only available in vl model\n cacheable: z.boolean().optional(),\n xpath: z.boolean().optional(), // preset result for xpath\n\n // these two fields will only appear in the result\n center: z.tuple([z.number(), z.number()]),\n rect: RectSchema,\n })\n .passthrough();\n\nexport type MidsceneLocationResultType = z.infer<typeof MidsceneLocationResult>;\nexport const getMidsceneLocationSchema = () => {\n return MidsceneLocationResult;\n};\n\nexport const ifMidsceneLocatorField = (field: any): boolean => {\n // Handle optional fields by getting the inner type\n let actualField = field;\n if (actualField._def?.typeName === 'ZodOptional') {\n actualField = actualField._def.innerType;\n }\n\n // Check if this is a ZodUnion (the new MidsceneLocation structure)\n if (actualField._def?.typeName === 'ZodObject') {\n const shape = actualField._def.shape();\n return locateFieldFlagName in shape;\n }\n\n return false;\n};\n\nexport const dumpMidsceneLocatorField = (field: any): string => {\n assert(\n ifMidsceneLocatorField(field),\n 'field is not a midscene locator field',\n );\n\n // If field is a string, return it directly\n if (typeof field === 'string') {\n return field;\n }\n\n // If field is an object with prompt property\n if (field && typeof field === 'object' && field.prompt) {\n // If prompt is a string, return it directly\n if (typeof field.prompt === 'string') {\n return field.prompt;\n }\n // If prompt is a TUserPrompt object, extract the prompt string\n if (typeof field.prompt === 'object' && field.prompt.prompt) {\n return field.prompt.prompt; // TODO: dump images if necessary\n }\n }\n\n // Fallback: try to convert to string\n return String(field);\n};\n\nexport const findAllMidsceneLocatorField = (\n zodType?: z.ZodType<any>,\n requiredOnly?: boolean,\n): string[] => {\n if (!zodType) {\n return [];\n }\n\n // Check if this is a ZodObject by checking if it has a shape property\n const zodObject = zodType as any;\n if (zodObject._def?.typeName === 'ZodObject' && zodObject.shape) {\n const keys = Object.keys(zodObject.shape);\n return keys.filter((key) => {\n const field = zodObject.shape[key];\n if (!ifMidsceneLocatorField(field)) {\n return false;\n }\n\n // If requiredOnly is true, filter out optional fields\n if (requiredOnly) {\n return field._def?.typeName !== 'ZodOptional';\n }\n\n return true;\n });\n }\n\n // For other ZodType instances, we can't extract field names\n return [];\n};\n\nexport const dumpActionParam = (\n jsonObject: Record<string, any>,\n zodSchema: z.ZodType<any>,\n): Record<string, any> => {\n const locatorFields = findAllMidsceneLocatorField(zodSchema);\n const result = { ...jsonObject };\n\n for (const fieldName of locatorFields) {\n const fieldValue = result[fieldName];\n if (fieldValue) {\n // If it's already a string, keep it as is\n if (typeof fieldValue === 'string') {\n result[fieldName] = fieldValue;\n } else if (typeof fieldValue === 'object') {\n // Check if this field is actually a MidsceneLocationType object\n if (fieldValue.prompt) {\n // If prompt is a string, use it directly\n if (typeof fieldValue.prompt === 'string') {\n result[fieldName] = fieldValue.prompt;\n } else if (\n typeof fieldValue.prompt === 'object' &&\n fieldValue.prompt.prompt\n ) {\n // If prompt is a TUserPrompt object, extract the prompt string\n result[fieldName] = fieldValue.prompt.prompt;\n }\n }\n }\n }\n }\n\n return result;\n};\n\nexport const loadActionParam = (\n jsonObject: Record<string, any>,\n zodSchema: z.ZodType<any>,\n): Record<string, any> => {\n const locatorFields = findAllMidsceneLocatorField(zodSchema);\n const result = { ...jsonObject };\n\n for (const fieldName of locatorFields) {\n const fieldValue = result[fieldName];\n if (fieldValue && typeof fieldValue === 'string') {\n result[fieldName] = {\n [locateFieldFlagName]: true,\n prompt: fieldValue,\n };\n }\n }\n\n return result;\n};\n"],"names":["AIActionType","defaultBboxSize","debugInspectUtils","getDebug","fillBboxParam","locate","width","height","vlMode","adaptBbox","adaptQwenBbox","bbox","msg","JSON","Error","result","Math","adaptDoubaoBbox","assert","splitted","Number","Array","bboxList","item","x","y","adaptGeminiBbox","left","top","right","bottom","adaptBboxToRect","offsetX","offsetY","rectLeft","rectTop","rectWidth","rectHeight","rect","warned","warnGPT4oSizeLimit","size","modelName","warningMsg","console","mergeRects","rects","minLeft","r","minTop","maxRight","maxBottom","expandSearchArea","screenSize","minEdgeSize","defaultPadding","paddingSizeHorizontal","paddingSizeVertical","markupImageForLLM","screenshotBase64","tree","elementsInfo","treeToList","elementsPositionInfoWithoutText","elementInfo","NodeType","imagePayload","compositeElementInfoImg","buildYamlFlowFromPlans","plans","actionSpace","sleep","flow","plan","verb","action","flowKey","flowParam","dumpActionParam","flowItem","PointSchema","z","SizeSchema","RectSchema","TMultimodalPromptSchema","TUserPromptSchema","locateFieldFlagName","MidsceneLocationResult","getMidsceneLocationSchema","ifMidsceneLocatorField","field","_actualField__def","_actualField__def1","actualField","shape","dumpMidsceneLocatorField","String","findAllMidsceneLocatorField","zodType","requiredOnly","_zodObject__def","zodObject","keys","Object","key","_field__def","jsonObject","zodSchema","locatorFields","fieldName","fieldValue","loadActionParam"],"mappings":";;;;;;AA8BO,IAAKA,sBAAYA,WAAAA,GAAAA,SAAZA,YAAY;;;;;;WAAZA;;AAQZ,MAAMC,kBAAkB;AACxB,MAAMC,oBAAoBC,SAAS;AAG5B,SAASC,cACdC,MAA2B,EAC3BC,KAAa,EACbC,MAAc,EACdC,MAAgC;IAGhC,IAAKH,OAAe,OAAO,IAAI,CAACA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,AAAD,GAAG;QAC5CA,OAAO,IAAI,GAAIA,OAAe,OAAO;QAErC,OAAQA,OAAe,OAAO;IAChC;IAEA,IAAIA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,EACdA,OAAO,IAAI,GAAGI,UAAUJ,OAAO,IAAI,EAAEC,OAAOC,QAAQC;IAGtD,OAAOH;AACT;AAEO,SAASK,cACdC,IAAc;IAEd,IAAIA,KAAK,MAAM,GAAG,GAAG;QACnB,MAAMC,MAAM,CAAC,oCAAoC,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;QAC1E,MAAM,IAAIG,MAAMF;IAClB;IAEA,MAAMG,SAA2C;QAC/CC,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QACC,YAAnB,OAAOA,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGV;QACN,YAAnB,OAAOU,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGV;KAC1B;IACD,OAAOc;AACT;AAEO,SAASE,gBACdN,IAAkC,EAClCL,KAAa,EACbC,MAAc;IAEdW,OACEZ,QAAQ,KAAKC,SAAS,GACtB;IAGF,IAAI,AAAgB,YAAhB,OAAOI,MAAmB;QAC5BO,OACE,+BAA+B,IAAI,CAACP,KAAK,IAAI,KAC7C,CAAC,iDAAiD,EAAEA,MAAM;QAE5D,MAAMQ,WAAWR,KAAK,KAAK,CAAC;QAC5B,IAAIQ,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACLH,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIb,QAAS;YAC3CU,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIZ,SAAU;YAC5CS,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIb,QAAS;YAC3CU,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIZ,SAAU;SAC7C;QAEH,MAAM,IAAIO,MAAM,CAAC,iDAAiD,EAAEH,MAAM;IAC5E;IAEA,IAAIU,MAAM,OAAO,CAACV,SAASU,MAAM,OAAO,CAACV,IAAI,CAAC,EAAE,GAC9CA,OAAOA,IAAI,CAAC,EAAE;IAGhB,IAAIW,WAAqB,EAAE;IAC3B,IAAID,MAAM,OAAO,CAACV,SAAS,AAAmB,YAAnB,OAAOA,IAAI,CAAC,EAAE,EACvCA,KAAK,OAAO,CAAC,CAACY;QACZ,IAAI,AAAgB,YAAhB,OAAOA,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YAClD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OAAO,IAAI,AAAgB,YAAhB,OAAOF,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YACzD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OACEH,SAAS,IAAI,CAACF,OAAOG;IAEzB;SAEAD,WAAWX;IAGb,IAAIW,AAAoB,MAApBA,SAAS,MAAM,IAAUA,AAAoB,MAApBA,SAAS,MAAM,EAC1C,OAAO;QACLN,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGhB,QAAS;QACnCU,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,SAAU;QACpCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGhB,QAAS;QACnCU,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,SAAU;KACrC;IAIH,IACEe,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,EAEf,OAAO;QACLN,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGhB,QAAS,QAAQL,kBAAkB;QAE/De,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,SAAU,QAAQN,kBAAkB;QAEhEe,KAAK,GAAG,CACNV,OACAU,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGhB,QAAS,QAAQL,kBAAkB;QAE/De,KAAK,GAAG,CACNT,QACAS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,SAAU,QAAQN,kBAAkB;KAEjE;IAGH,IAAIU,AAAgB,MAAhBA,KAAK,MAAM,EACb,OAAO;QACLK,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGhB,QAAS;QACnCU,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,SAAU;QACpCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGhB,QAAS;QACnCU,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,SAAU;KACrC;IAGH,MAAMK,MAAM,CAAC,0CAA0C,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;IAChF,MAAM,IAAIG,MAAMF;AAClB;AAEO,SAASH,UACdE,IAAc,EACdL,KAAa,EACbC,MAAc,EACdC,MAAgC;IAEhC,IAAIA,AAAW,oBAAXA,UAA8BA,AAAW,kBAAXA,QAChC,OAAOS,gBAAgBN,MAAML,OAAOC;IAGtC,IAAIC,AAAW,aAAXA,QACF,OAAOkB,gBAAgBf,MAAML,OAAOC;IAGtC,OAAOG,cAAcC;AACvB;AAEO,SAASe,gBACdf,IAAc,EACdL,KAAa,EACbC,MAAc;IAEd,MAAMoB,OAAOX,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGL,QAAS;IAC5C,MAAMsB,MAAMZ,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGJ,SAAU;IAC5C,MAAMsB,QAAQb,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGL,QAAS;IAC7C,MAAMwB,SAASd,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGJ,SAAU;IAC/C,OAAO;QAACoB;QAAMC;QAAKC;QAAOC;KAAO;AACnC;AAEO,SAASC,gBACdpB,IAAc,EACdL,KAAa,EACbC,MAAc,EACdyB,UAAU,CAAC,EACXC,UAAU,CAAC,EACXzB,MAAiC;IAEjCN,kBAAkB,mBAAmBS,MAAML,OAAOC,QAAQyB,SAASC;IACnE,MAAM,CAACN,MAAMC,KAAKC,OAAOC,OAAO,GAAGrB,UAAUE,MAAML,OAAOC,QAAQC;IAGlE,MAAM0B,WAAWP,OAAOK;IACxB,MAAMG,UAAUP,MAAMK;IACtB,IAAIG,YAAYP,QAAQF;IACxB,IAAIU,aAAaP,SAASF;IAI1B,IAAIM,WAAWE,YAAY9B,OACzB8B,YAAY9B,QAAQ4B;IAItB,IAAIC,UAAUE,aAAa9B,QACzB8B,aAAa9B,SAAS4B;IAIxBC,YAAYpB,KAAK,GAAG,CAAC,GAAGoB;IACxBC,aAAarB,KAAK,GAAG,CAAC,GAAGqB;IAEzB,MAAMC,OAAO;QACX,MAAMJ;QACN,KAAKC;QACL,OAAOC;QACP,QAAQC;IACV;IACAnC,kBAAkB,4BAA4BoC;IAC9C,OAAOA;AACT;AAEA,IAAIC,SAAS;AACN,SAASC,mBAAmBC,IAAU,EAAEC,SAAiB;IAC9D,IAAIH,QAAQ;IACZ,IAAIG,UAAU,WAAW,GAAG,QAAQ,CAAC,WAAW;QAC9C,MAAMC,aAAa,CAAC,uEAAuE,EAAEF,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,6FAA6F,CAAC;QAErN,IACEzB,KAAK,GAAG,CAACyB,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,QACpCzB,KAAK,GAAG,CAACyB,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,KACpC;YACAG,QAAQ,IAAI,CAACD;YACbJ,SAAS;QACX;IACF,OAAO,IAAIE,KAAK,KAAK,GAAG,QAAQA,KAAK,MAAM,GAAG,MAAM;QAClDG,QAAQ,IAAI,CACV,CAAC,gCAAgC,EAAEH,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,2EAA2E,CAAC;QAE3IF,SAAS;IACX;AACF;AAEO,SAASM,WAAWC,KAAa;IACtC,MAAMC,UAAU/B,KAAK,GAAG,IAAI8B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI;IACnD,MAAMC,SAASjC,KAAK,GAAG,IAAI8B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG;IACjD,MAAME,WAAWlC,KAAK,GAAG,IAAI8B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI,GAAGA,EAAE,KAAK;IAC9D,MAAMG,YAAYnC,KAAK,GAAG,IAAI8B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG,GAAGA,EAAE,MAAM;IAC/D,OAAO;QACL,MAAMD;QACN,KAAKE;QACL,OAAOC,WAAWH;QAClB,QAAQI,YAAYF;IACtB;AACF;AAGO,SAASG,iBACdd,IAAU,EACVe,UAAgB,EAChB7C,MAAgC;IAEhC,MAAM8C,cAAc9C,AAAW,oBAAXA,SAA6B,MAAM;IACvD,MAAM+C,iBAAiB;IAEvB,MAAMC,wBACJlB,KAAK,KAAK,GAAGgB,cACTtC,KAAK,IAAI,CAAEsC,AAAAA,CAAAA,cAAchB,KAAK,KAAI,IAAK,KACvCiB;IACN,MAAME,sBACJnB,KAAK,MAAM,GAAGgB,cACVtC,KAAK,IAAI,CAAEsC,AAAAA,CAAAA,cAAchB,KAAK,MAAK,IAAK,KACxCiB;IACNjB,KAAK,IAAI,GAAGtB,KAAK,GAAG,CAAC,GAAGsB,KAAK,IAAI,GAAGkB;IACpClB,KAAK,KAAK,GAAGtB,KAAK,GAAG,CACnBsB,KAAK,KAAK,GAAGkB,AAAwB,IAAxBA,uBACbH,WAAW,KAAK,GAAGf,KAAK,IAAI;IAE9BA,KAAK,GAAG,GAAGtB,KAAK,GAAG,CAAC,GAAGsB,KAAK,GAAG,GAAGmB;IAClCnB,KAAK,MAAM,GAAGtB,KAAK,GAAG,CACpBsB,KAAK,MAAM,GAAGmB,AAAsB,IAAtBA,qBACdJ,WAAW,MAAM,GAAGf,KAAK,GAAG;IAE9B,OAAOA;AACT;AAEO,eAAeoB,kBACpBC,gBAAwB,EACxBC,IAAkC,EAClCnB,IAAU;IAEV,MAAMoB,eAAeC,WAAWF;IAChC,MAAMG,kCAAkCF,aAAc,MAAM,CAC1D,CAACG;QACC,IAAIA,YAAY,UAAU,CAAC,QAAQ,KAAKC,SAAS,IAAI,EACnD,OAAO;QAET,OAAO;IACT;IAGF,MAAMC,eAAe,MAAMC,wBAAwB;QACjD,gBAAgBR;QAChB,sBAAsBI;QACtBtB;IACF;IACA,OAAOyB;AACT;AAEO,SAASE,uBACdC,KAAuB,EACvBC,WAAgC,EAChCC,KAAc;IAEd,MAAMC,OAA+B,EAAE;IAEvC,KAAK,MAAMC,QAAQJ,MAAO;QACxB,MAAMK,OAAOD,KAAK,IAAI;QAEtB,MAAME,SAASL,YAAY,IAAI,CAAC,CAACK,SAAWA,OAAO,IAAI,KAAKD;QAC5D,IAAI,CAACC,QAAQ;YACX/B,QAAQ,IAAI,CACV,CAAC,sBAAsB,EAAE8B,KAAK,8BAA8B,CAAC;YAE/D;QACF;QAEA,MAAME,UAAUD,OAAO,cAAc,IAAID;QACzC,MAAMG,YAAYF,OAAO,WAAW,GAChCG,gBAAgBL,KAAK,KAAK,IAAI,CAAC,GAAGE,OAAO,WAAW,IACpD,CAAC;QAEL,MAAMI,WAAiC;YACrC,CAACH,QAAQ,EAAE;YACX,GAAGC,SAAS;QACd;QAEAL,KAAK,IAAI,CAACO;IACZ;IAEA,IAAIR,OACFC,KAAK,IAAI,CAAC;QACRD;IACF;IAGF,OAAOC;AACT;AAGO,MAAMQ,cAAcC,EAAE,MAAM,CAAC;IAClC,MAAMA,EAAE,MAAM;IACd,KAAKA,EAAE,MAAM;AACf;AAEO,MAAMC,aAAaD,EAAE,MAAM,CAAC;IACjC,OAAOA,EAAE,MAAM;IACf,QAAQA,EAAE,MAAM;IAChB,KAAKA,EAAE,MAAM,GAAG,QAAQ;AAC1B;AAEO,MAAME,aAAaH,YAAY,GAAG,CAACE,YAAY,GAAG,CACvDD,EAAE,MAAM,CAAC;IACP,MAAMA,EAAE,MAAM,GAAG,QAAQ;AAC3B;AAIK,MAAMG,0BAA0BH,EAAE,MAAM,CAAC;IAC9C,QAAQA,EAAAA,KACA,CACJA,EAAE,MAAM,CAAC;QACP,MAAMA,EAAE,MAAM;QACd,KAAKA,EAAE,MAAM;IACf,IAED,QAAQ;IACX,yBAAyBA,EAAE,OAAO,GAAG,QAAQ;AAC/C;AAGO,MAAMI,oBAAoBJ,EAAE,KAAK,CAAC;IACvCA,EAAE,MAAM;IACRA,EAAAA,MACS,CAAC;QACN,QAAQA,EAAE,MAAM;IAClB,GACC,GAAG,CAACG,wBAAwB,OAAO;CACvC;AAMD,MAAME,sBAAsB;AAE5B,MAAMC,yBAAyBN,EAAAA,MACtB,CAAC;IACN,CAACK,oBAAoB,EAAEL,EAAE,OAAO,CAAC;IACjC,QAAQI;IAGR,WAAWJ,EAAE,OAAO,GAAG,QAAQ;IAC/B,WAAWA,EAAE,OAAO,GAAG,QAAQ;IAC/B,OAAOA,EAAE,OAAO,GAAG,QAAQ;IAG3B,QAAQA,EAAE,KAAK,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,MAAM;KAAG;IACxC,MAAME;AACR,GACC,WAAW;AAGP,MAAMK,4BAA4B,IAChCD;AAGF,MAAME,yBAAyB,CAACC;QAGjCC,mBAKAC;IANJ,IAAIC,cAAcH;IAClB,IAAIC,AAAAA,SAAAA,CAAAA,oBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,kBAAkB,QAAQ,AAAD,MAAM,eACjCE,cAAcA,YAAY,IAAI,CAAC,SAAS;IAI1C,IAAID,AAAAA,SAAAA,CAAAA,qBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,mBAAkB,QAAQ,AAAD,MAAM,aAAa;QAC9C,MAAME,QAAQD,YAAY,IAAI,CAAC,KAAK;QACpC,OAAOP,uBAAuBQ;IAChC;IAEA,OAAO;AACT;AAEO,MAAMC,2BAA2B,CAACL;IACvCxE,OACEuE,uBAAuBC,QACvB;IAIF,IAAI,AAAiB,YAAjB,OAAOA,OACT,OAAOA;IAIT,IAAIA,SAAS,AAAiB,YAAjB,OAAOA,SAAsBA,MAAM,MAAM,EAAE;QAEtD,IAAI,AAAwB,YAAxB,OAAOA,MAAM,MAAM,EACrB,OAAOA,MAAM,MAAM;QAGrB,IAAI,AAAwB,YAAxB,OAAOA,MAAM,MAAM,IAAiBA,MAAM,MAAM,CAAC,MAAM,EACzD,OAAOA,MAAM,MAAM,CAAC,MAAM;IAE9B;IAGA,OAAOM,OAAON;AAChB;AAEO,MAAMO,8BAA8B,CACzCC,SACAC;QAQIC;IANJ,IAAI,CAACF,SACH,OAAO,EAAE;IAIX,MAAMG,YAAYH;IAClB,IAAIE,AAAAA,SAAAA,CAAAA,kBAAAA,UAAU,IAAI,AAAD,IAAbA,KAAAA,IAAAA,gBAAgB,QAAQ,AAAD,MAAM,eAAeC,UAAU,KAAK,EAAE;QAC/D,MAAMC,OAAOC,OAAO,IAAI,CAACF,UAAU,KAAK;QACxC,OAAOC,KAAK,MAAM,CAAC,CAACE;YAClB,MAAMd,QAAQW,UAAU,KAAK,CAACG,IAAI;YAClC,IAAI,CAACf,uBAAuBC,QAC1B,OAAO;YAIT,IAAIS,cAAc;oBACTM;gBAAP,OAAOA,AAAAA,SAAAA,CAAAA,cAAAA,MAAM,IAAI,AAAD,IAATA,KAAAA,IAAAA,YAAY,QAAQ,AAAD,MAAM;YAClC;YAEA,OAAO;QACT;IACF;IAGA,OAAO,EAAE;AACX;AAEO,MAAM3B,kBAAkB,CAC7B4B,YACAC;IAEA,MAAMC,gBAAgBX,4BAA4BU;IAClD,MAAM5F,SAAS;QAAE,GAAG2F,UAAU;IAAC;IAE/B,KAAK,MAAMG,aAAaD,cAAe;QACrC,MAAME,aAAa/F,MAAM,CAAC8F,UAAU;QACpC,IAAIC,YAEF;YAAA,IAAI,AAAsB,YAAtB,OAAOA,YACT/F,MAAM,CAAC8F,UAAU,GAAGC;iBACf,IAAI,AAAsB,YAAtB,OAAOA,YAEhB;gBAAA,IAAIA,WAAW,MAAM,EAEnB;oBAAA,IAAI,AAA6B,YAA7B,OAAOA,WAAW,MAAM,EAC1B/F,MAAM,CAAC8F,UAAU,GAAGC,WAAW,MAAM;yBAChC,IACL,AAA6B,YAA7B,OAAOA,WAAW,MAAM,IACxBA,WAAW,MAAM,CAAC,MAAM,EAGxB/F,MAAM,CAAC8F,UAAU,GAAGC,WAAW,MAAM,CAAC,MAAM;gBAC9C;YACF;QACF;IAEJ;IAEA,OAAO/F;AACT;AAEO,MAAMgG,kBAAkB,CAC7BL,YACAC;IAEA,MAAMC,gBAAgBX,4BAA4BU;IAClD,MAAM5F,SAAS;QAAE,GAAG2F,UAAU;IAAC;IAE/B,KAAK,MAAMG,aAAaD,cAAe;QACrC,MAAME,aAAa/F,MAAM,CAAC8F,UAAU;QACpC,IAAIC,cAAc,AAAsB,YAAtB,OAAOA,YACvB/F,MAAM,CAAC8F,UAAU,GAAG;YAClB,CAACvB,oBAAoB,EAAE;YACvB,QAAQwB;QACV;IAEJ;IAEA,OAAO/F;AACT"}
@@ -1,10 +1,10 @@
1
- import { call, callAiFnWithStringResponse, callToGetJSONObject } from "./service-caller/index.mjs";
1
+ import { callAI, callAIWithObjectResponse, callAIWithStringResponse } from "./service-caller/index.mjs";
2
2
  import { systemPromptToLocateElement } from "./prompt/llm-locator.mjs";
3
3
  import { describeUserPage, elementByPositionWithElementInfo } from "./prompt/util.mjs";
4
4
  import { generatePlaywrightTest, generatePlaywrightTestStream } from "./prompt/playwright-generator.mjs";
5
5
  import { generateYamlTest, generateYamlTestStream } from "./prompt/yaml-generator.mjs";
6
6
  import { AiExtractElementInfo, AiLocateElement, AiLocateSection } from "./inspect.mjs";
7
7
  import { plan } from "./llm-planning.mjs";
8
- import { AIActionType, PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBboxToRect, callAiFn, dumpActionParam, findAllMidsceneLocatorField, getMidsceneLocationSchema, loadActionParam } from "./common.mjs";
8
+ import { AIActionType, PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBboxToRect, dumpActionParam, findAllMidsceneLocatorField, getMidsceneLocationSchema, loadActionParam } from "./common.mjs";
9
9
  import { resizeImageForUiTars, vlmPlanning } from "./ui-tars-planning.mjs";
10
- export { AIActionType, AiExtractElementInfo, AiLocateElement, AiLocateSection, PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBboxToRect, call as callAi, callAiFn, callAiFnWithStringResponse, callToGetJSONObject, describeUserPage, dumpActionParam, elementByPositionWithElementInfo, findAllMidsceneLocatorField, generatePlaywrightTest, generatePlaywrightTestStream, generateYamlTest, generateYamlTestStream, getMidsceneLocationSchema, loadActionParam, plan, resizeImageForUiTars, systemPromptToLocateElement, vlmPlanning };
10
+ export { AIActionType, AiExtractElementInfo, AiLocateElement, AiLocateSection, PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBboxToRect, callAI, callAIWithObjectResponse, callAIWithStringResponse, describeUserPage, dumpActionParam, elementByPositionWithElementInfo, findAllMidsceneLocatorField, generatePlaywrightTest, generatePlaywrightTestStream, generateYamlTest, generateYamlTestStream, getMidsceneLocationSchema, loadActionParam, plan, resizeImageForUiTars, systemPromptToLocateElement, vlmPlanning };
@@ -1,13 +1,12 @@
1
- import { getIsUseQwenVl, vlLocateMode } from "@midscene/shared/env";
2
1
  import { cropByRect, paddingToMatchBlockByBase64, preProcessImageUrl } from "@midscene/shared/img";
3
2
  import { getDebug } from "@midscene/shared/logger";
4
3
  import { assert } from "@midscene/shared/utils";
5
- import { AIActionType, adaptBboxToRect, callAiFn, expandSearchArea, markupImageForLLM, mergeRects } from "./common.mjs";
4
+ import { AIActionType, adaptBboxToRect, expandSearchArea, markupImageForLLM, mergeRects } from "./common.mjs";
6
5
  import { extractDataQueryPrompt, systemPromptToExtract } from "./prompt/extraction.mjs";
7
6
  import { findElementPrompt, systemPromptToLocateElement } from "./prompt/llm-locator.mjs";
8
7
  import { sectionLocatorInstruction, systemPromptToLocateSection } from "./prompt/llm-section-locator.mjs";
9
8
  import { describeUserPage, distance, distanceThreshold, elementByPositionWithElementInfo } from "./prompt/util.mjs";
10
- import { callToGetJSONObject } from "./service-caller/index.mjs";
9
+ import { callAIWithObjectResponse } from "./service-caller/index.mjs";
11
10
  const debugInspect = getDebug('ai:inspect');
12
11
  const debugSection = getDebug('ai:section');
13
12
  const extraTextFromUserPrompt = (prompt)=>{
@@ -55,25 +54,25 @@ const promptsToChatParam = async (multimodalPrompt)=>{
55
54
  return msgs;
56
55
  };
57
56
  async function AiLocateElement(options) {
58
- const { context, targetElementDescription, callAI } = options;
57
+ const { context, targetElementDescription, callAIFn, modelConfig } = options;
58
+ const { vlMode } = modelConfig;
59
59
  const { screenshotBase64 } = context;
60
- const modelPreferences = {
61
- intent: 'grounding'
62
- };
63
- const { description, elementById, insertElementByPosition } = await describeUserPage(context, modelPreferences);
60
+ const { description, elementById, insertElementByPosition } = await describeUserPage(context, {
61
+ vlMode
62
+ });
64
63
  assert(targetElementDescription, "cannot find the target element description");
65
64
  const userInstructionPrompt = await findElementPrompt.format({
66
65
  pageDescription: description,
67
66
  targetElementDescription: extraTextFromUserPrompt(targetElementDescription)
68
67
  });
69
- const systemPrompt = systemPromptToLocateElement(vlLocateMode(modelPreferences));
68
+ const systemPrompt = systemPromptToLocateElement(vlMode);
70
69
  let imagePayload = screenshotBase64;
71
70
  if (options.searchConfig) {
72
71
  assert(options.searchConfig.rect, 'searchArea is provided but its rect cannot be found. Failed to locate element');
73
72
  assert(options.searchConfig.imageBase64, 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element');
74
73
  imagePayload = options.searchConfig.imageBase64;
75
- } else if ('qwen-vl' === vlLocateMode(modelPreferences)) imagePayload = await paddingToMatchBlockByBase64(imagePayload);
76
- else if (!vlLocateMode(modelPreferences)) imagePayload = await markupImageForLLM(screenshotBase64, context.tree, context.size);
74
+ } else if ('qwen-vl' === vlMode) imagePayload = await paddingToMatchBlockByBase64(imagePayload);
75
+ else if (!vlMode) imagePayload = await markupImageForLLM(screenshotBase64, context.tree, context.size);
77
76
  const msgs = [
78
77
  {
79
78
  role: 'system',
@@ -103,10 +102,7 @@ async function AiLocateElement(options) {
103
102
  });
104
103
  msgs.push(...addOns);
105
104
  }
106
- const callAIFn = callAI || callToGetJSONObject;
107
- const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT, {
108
- intent: 'grounding'
109
- });
105
+ const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT, modelConfig);
110
106
  const rawResponse = JSON.stringify(res.content);
111
107
  let resRect;
112
108
  let matchedElements = 'elements' in res.content ? res.content.elements : [];
@@ -114,7 +110,7 @@ async function AiLocateElement(options) {
114
110
  try {
115
111
  if ('bbox' in res.content && Array.isArray(res.content.bbox)) {
116
112
  var _options_searchConfig_rect, _options_searchConfig, _options_searchConfig_rect1, _options_searchConfig1, _options_searchConfig_rect2, _options_searchConfig2, _options_searchConfig_rect3, _options_searchConfig3;
117
- resRect = adaptBboxToRect(res.content.bbox, (null == (_options_searchConfig = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect = _options_searchConfig.rect) ? void 0 : _options_searchConfig_rect.width) || context.size.width, (null == (_options_searchConfig1 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect1 = _options_searchConfig1.rect) ? void 0 : _options_searchConfig_rect1.height) || context.size.height, modelPreferences, null == (_options_searchConfig2 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect2 = _options_searchConfig2.rect) ? void 0 : _options_searchConfig_rect2.left, null == (_options_searchConfig3 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect3 = _options_searchConfig3.rect) ? void 0 : _options_searchConfig_rect3.top);
113
+ resRect = adaptBboxToRect(res.content.bbox, (null == (_options_searchConfig = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect = _options_searchConfig.rect) ? void 0 : _options_searchConfig_rect.width) || context.size.width, (null == (_options_searchConfig1 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect1 = _options_searchConfig1.rect) ? void 0 : _options_searchConfig_rect1.height) || context.size.height, null == (_options_searchConfig2 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect2 = _options_searchConfig2.rect) ? void 0 : _options_searchConfig_rect2.left, null == (_options_searchConfig3 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect3 = _options_searchConfig3.rect) ? void 0 : _options_searchConfig_rect3.top, vlMode);
118
114
  debugInspect('resRect', resRect);
119
115
  const rectCenter = {
120
116
  x: resRect.left + resRect.width / 2,
@@ -153,12 +149,10 @@ async function AiLocateElement(options) {
153
149
  };
154
150
  }
155
151
  async function AiLocateSection(options) {
156
- const { context, sectionDescription } = options;
152
+ const { context, sectionDescription, modelConfig } = options;
153
+ const { vlMode } = modelConfig;
157
154
  const { screenshotBase64 } = context;
158
- const modelPreferences = {
159
- intent: 'grounding'
160
- };
161
- const systemPrompt = systemPromptToLocateSection(vlLocateMode(modelPreferences));
155
+ const systemPrompt = systemPromptToLocateSection(vlMode);
162
156
  const sectionLocatorInstructionText = await sectionLocatorInstruction.format({
163
157
  sectionDescription: extraTextFromUserPrompt(sectionDescription)
164
158
  });
@@ -191,30 +185,26 @@ async function AiLocateSection(options) {
191
185
  });
192
186
  msgs.push(...addOns);
193
187
  }
194
- const result = await callAiFn(msgs, AIActionType.EXTRACT_DATA, {
195
- intent: 'grounding'
196
- });
188
+ const result = await callAIWithObjectResponse(msgs, AIActionType.EXTRACT_DATA, modelConfig);
197
189
  let sectionRect;
198
190
  const sectionBbox = result.content.bbox;
199
191
  if (sectionBbox) {
200
- const targetRect = adaptBboxToRect(sectionBbox, context.size.width, context.size.height, modelPreferences);
192
+ const targetRect = adaptBboxToRect(sectionBbox, context.size.width, context.size.height, 0, 0, vlMode);
201
193
  debugSection('original targetRect %j', targetRect);
202
194
  const referenceBboxList = result.content.references_bbox || [];
203
195
  debugSection('referenceBboxList %j', referenceBboxList);
204
- const referenceRects = referenceBboxList.filter((bbox)=>Array.isArray(bbox)).map((bbox)=>adaptBboxToRect(bbox, context.size.width, context.size.height, modelPreferences));
196
+ const referenceRects = referenceBboxList.filter((bbox)=>Array.isArray(bbox)).map((bbox)=>adaptBboxToRect(bbox, context.size.width, context.size.height, 0, 0, vlMode));
205
197
  debugSection('referenceRects %j', referenceRects);
206
198
  const mergedRect = mergeRects([
207
199
  targetRect,
208
200
  ...referenceRects
209
201
  ]);
210
202
  debugSection('mergedRect %j', mergedRect);
211
- sectionRect = expandSearchArea(mergedRect, context.size, modelPreferences);
203
+ sectionRect = expandSearchArea(mergedRect, context.size, vlMode);
212
204
  debugSection('expanded sectionRect %j', sectionRect);
213
205
  }
214
206
  let imageBase64 = screenshotBase64;
215
- if (sectionRect) imageBase64 = await cropByRect(screenshotBase64, sectionRect, getIsUseQwenVl({
216
- intent: 'grounding'
217
- }));
207
+ if (sectionRect) imageBase64 = await cropByRect(screenshotBase64, sectionRect, 'qwen-vl' === vlMode);
218
208
  return {
219
209
  rect: sectionRect,
220
210
  imageBase64,
@@ -225,14 +215,16 @@ async function AiLocateSection(options) {
225
215
  }
226
216
  async function AiExtractElementInfo(options) {
227
217
  var _options_extractOption;
228
- const { dataQuery, context, extractOption, multimodalPrompt, modelPreferences } = options;
218
+ const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } = options;
219
+ const { vlMode } = modelConfig;
229
220
  const systemPrompt = systemPromptToExtract();
230
221
  const { screenshotBase64 } = context;
231
- const { description, elementById } = await describeUserPage(context, modelPreferences, {
222
+ const { description, elementById } = await describeUserPage(context, {
232
223
  truncateTextLength: 200,
233
224
  filterNonTextContent: false,
234
225
  visibleOnly: false,
235
- domIncluded: null == extractOption ? void 0 : extractOption.domIncluded
226
+ domIncluded: null == extractOption ? void 0 : extractOption.domIncluded,
227
+ vlMode
236
228
  });
237
229
  const extractDataPromptText = await extractDataQueryPrompt(description, dataQuery);
238
230
  const userContent = [];
@@ -268,7 +260,7 @@ async function AiExtractElementInfo(options) {
268
260
  });
269
261
  msgs.push(...addOns);
270
262
  }
271
- const result = await callAiFn(msgs, AIActionType.EXTRACT_DATA, modelPreferences);
263
+ const result = await callAIWithObjectResponse(msgs, AIActionType.EXTRACT_DATA, modelConfig);
272
264
  return {
273
265
  parseResult: result.content,
274
266
  elementById,
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/inspect.mjs","sources":["webpack://@midscene/core/./src/ai-model/inspect.ts"],"sourcesContent":["import type {\n AIDataExtractionResponse,\n AIElementLocatorResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n BaseElement,\n ElementById,\n InsightExtractOption,\n Rect,\n ReferenceImage,\n UIContext,\n} from '@/types';\nimport {\n type IModelPreferences,\n getIsUseQwenVl,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from './common';\nimport {\n AIActionType,\n adaptBboxToRect,\n callAiFn,\n expandSearchArea,\n markupImageForLLM,\n mergeRects,\n} from './common';\nimport {\n extractDataQueryPrompt,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n describeUserPage,\n distance,\n distanceThreshold,\n elementByPositionWithElementInfo,\n} from './prompt/util';\nimport { callToGetJSONObject } from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `reference image ${item.name}:`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement<\n ElementType extends BaseElement = BaseElement,\n>(options: {\n context: UIContext<ElementType>;\n targetElementDescription: TUserPrompt;\n referenceImage?: ReferenceImage;\n callAI?: typeof callAiFn<AIElementResponse | [number, number]>;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n}): Promise<{\n parseResult: AIElementLocatorResponse;\n rect?: Rect;\n rawResponse: string;\n elementById: ElementById;\n usage?: AIUsageInfo;\n isOrderSensitive?: boolean;\n}> {\n const { context, targetElementDescription, callAI } = options;\n const { screenshotBase64 } = context;\n\n const modelPreferences: IModelPreferences = {\n intent: 'grounding',\n };\n\n const { description, elementById, insertElementByPosition } =\n await describeUserPage(context, modelPreferences);\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n\n const userInstructionPrompt = await findElementPrompt.format({\n pageDescription: description,\n targetElementDescription: extraTextFromUserPrompt(targetElementDescription),\n });\n const systemPrompt = systemPromptToLocateElement(\n vlLocateMode(modelPreferences),\n );\n\n let imagePayload = screenshotBase64;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n } else if (vlLocateMode(modelPreferences) === 'qwen-vl') {\n imagePayload = await paddingToMatchBlockByBase64(imagePayload);\n } else if (!vlLocateMode(modelPreferences)) {\n imagePayload = await markupImageForLLM(\n screenshotBase64,\n context.tree,\n context.size,\n );\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const callAIFn =\n callAI || callToGetJSONObject<AIElementResponse | [number, number]>;\n\n const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT, {\n intent: 'grounding',\n });\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements: AIElementLocatorResponse['elements'] =\n 'elements' in res.content ? res.content.elements : [];\n let errors: AIElementLocatorResponse['errors'] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if ('bbox' in res.content && Array.isArray(res.content.bbox)) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n options.searchConfig?.rect?.width || context.size.width,\n options.searchConfig?.rect?.height || context.size.height,\n modelPreferences,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n );\n debugInspect('resRect', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n let element = elementByPositionWithElementInfo(context.tree, rectCenter);\n\n const distanceToCenter = element\n ? distance({ x: element.center[0], y: element.center[1] }, rectCenter)\n : 0;\n\n if (!element || distanceToCenter > distanceThreshold) {\n element = insertElementByPosition(rectCenter);\n }\n\n if (element) {\n matchedElements = [element];\n errors = [];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements,\n errors,\n },\n rawResponse,\n elementById,\n usage: res.usage,\n isOrderSensitive:\n typeof res.content === 'object' &&\n res.content !== null &&\n 'isOrderSensitive' in res.content\n ? (res.content as any).isOrderSensitive\n : undefined,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext<BaseElement>;\n sectionDescription: TUserPrompt;\n callAI?: typeof callAiFn<AISectionLocatorResponse>;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription } = options;\n const { screenshotBase64 } = context;\n\n const modelPreferences: IModelPreferences = {\n intent: 'grounding',\n };\n\n const systemPrompt = systemPromptToLocateSection(\n vlLocateMode(modelPreferences),\n );\n const sectionLocatorInstructionText = await sectionLocatorInstruction.format({\n sectionDescription: extraTextFromUserPrompt(sectionDescription),\n });\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAiFn<AISectionLocatorResponse>(\n msgs,\n AIActionType.EXTRACT_DATA,\n {\n intent: 'grounding',\n },\n );\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n modelPreferences,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.size.width,\n context.size.height,\n modelPreferences,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n // expand search area to at least 200 x 200\n sectionRect = expandSearchArea(mergedRect, context.size, modelPreferences);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n if (sectionRect) {\n imageBase64 = await cropByRect(\n screenshotBase64,\n sectionRect,\n getIsUseQwenVl({\n intent: 'grounding',\n }),\n );\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<\n T,\n ElementType extends BaseElement = BaseElement,\n>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext<ElementType>;\n extractOption?: InsightExtractOption;\n modelPreferences: IModelPreferences;\n}) {\n const {\n dataQuery,\n context,\n extractOption,\n multimodalPrompt,\n modelPreferences,\n } = options;\n const systemPrompt = systemPromptToExtract();\n\n const { screenshotBase64 } = context;\n\n const { description, elementById } = await describeUserPage(\n context,\n modelPreferences,\n {\n truncateTextLength: 200,\n filterNonTextContent: false,\n visibleOnly: false,\n domIncluded: extractOption?.domIncluded,\n },\n );\n\n const extractDataPromptText = await extractDataQueryPrompt(\n description,\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (options.extractOption?.returnThought) {\n msgs.push({\n role: 'user',\n content: 'Please provide reasons.',\n });\n }\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAiFn<AIDataExtractionResponse<T>>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelPreferences,\n );\n return {\n parseResult: result.content,\n elementById,\n usage: result.usage,\n };\n}\n"],"names":["debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","_multimodalPrompt_images","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","callAI","screenshotBase64","modelPreferences","description","elementById","insertElementByPosition","describeUserPage","assert","userInstructionPrompt","findElementPrompt","systemPrompt","systemPromptToLocateElement","vlLocateMode","imagePayload","paddingToMatchBlockByBase64","markupImageForLLM","addOns","callAIFn","callToGetJSONObject","res","AIActionType","rawResponse","JSON","resRect","matchedElements","errors","Array","_options_searchConfig_rect","_options_searchConfig_rect1","_options_searchConfig_rect2","_options_searchConfig_rect3","adaptBboxToRect","rectCenter","element","elementByPositionWithElementInfo","distanceToCenter","distance","distanceThreshold","e","msg","Error","undefined","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","callAiFn","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","cropByRect","getIsUseQwenVl","AiExtractElementInfo","_options_extractOption","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent"],"mappings":";;;;;;;;;;AA+DA,MAAMA,eAAeC,SAAS;AAC9B,MAAMC,eAAeD,SAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;QAGIC;IADJ,MAAMC,OAAyC,EAAE;IACjD,IAAID,QAAAA,mBAAAA,KAAAA,IAAAA,QAAAA,CAAAA,2BAAAA,iBAAkB,MAAM,AAAD,IAAvBA,KAAAA,IAAAA,yBAA0B,MAAM,EAAE;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQH,iBAAiB,MAAM,CAAE;YAC1C,MAAMI,SAAS,MAAMC,mBACnBF,KAAK,GAAG,EACR,CAAC,CAACH,iBAAiB,uBAAuB;YAG5CE,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,gBAAgB,EAAEC,KAAK,IAAI,CAAC,CAAC,CAAC;oBACvC;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAEpBC,OAMD;IAQC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,MAAM,EAAE,GAAGH;IACtD,MAAM,EAAEI,gBAAgB,EAAE,GAAGH;IAE7B,MAAMI,mBAAsC;QAC1C,QAAQ;IACV;IAEA,MAAM,EAAEC,WAAW,EAAEC,WAAW,EAAEC,uBAAuB,EAAE,GACzD,MAAMC,iBAAiBR,SAASI;IAElCK,OACER,0BACA;IAGF,MAAMS,wBAAwB,MAAMC,kBAAkB,MAAM,CAAC;QAC3D,iBAAiBN;QACjB,0BAA0BhB,wBAAwBY;IACpD;IACA,MAAMW,eAAeC,4BACnBC,aAAaV;IAGf,IAAIW,eAAeZ;IAEnB,IAAIJ,QAAQ,YAAY,EAAE;QACxBU,OACEV,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFU,OACEV,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFgB,eAAehB,QAAQ,YAAY,CAAC,WAAW;IACjD,OAAO,IAAIe,AAAmC,cAAnCA,aAAaV,mBACtBW,eAAe,MAAMC,4BAA4BD;SAC5C,IAAI,CAACD,aAAaV,mBACvBW,eAAe,MAAME,kBACnBd,kBACAH,QAAQ,IAAI,EACZA,QAAQ,IAAI;IAIhB,MAAMN,OAAe;QACnB;YAAE,MAAM;YAAU,SAASkB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKG;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAML;gBACR;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAOT,0BAAuC;QAChD,MAAMiB,SAAS,MAAM3B,mBAAmB;YACtC,QAAQU,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAIwB;IACf;IAEA,MAAMC,WACJjB,UAAUkB;IAEZ,MAAMC,MAAM,MAAMF,SAASzB,MAAM4B,aAAa,eAAe,EAAE;QAC7D,QAAQ;IACV;IAEA,MAAMC,cAAcC,KAAK,SAAS,CAACH,IAAI,OAAO;IAE9C,IAAII;IACJ,IAAIC,kBACF,cAAcL,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,QAAQ,GAAG,EAAE;IACvD,IAAIM,SACF,YAAYN,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IAAI,UAAUA,IAAI,OAAO,IAAIO,MAAM,OAAO,CAACP,IAAI,OAAO,CAAC,IAAI,GAAG;gBAG1DQ,4BAAAA,uBACAC,6BAAAA,wBAEAC,6BAAAA,wBACAC,6BAAAA;YANFP,UAAUQ,gBACRZ,IAAI,OAAO,CAAC,IAAI,EAChBQ,AAAAA,SAAAA,CAAAA,wBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,6BAAAA,sBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,2BAA4B,KAAK,AAAD,KAAK7B,QAAQ,IAAI,CAAC,KAAK,EACvD8B,AAAAA,SAAAA,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,MAAM,AAAD,KAAK9B,QAAQ,IAAI,CAAC,MAAM,EACzDI,kBAAAA,QACA2B,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,IAAI,UAChCC,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,GAAG;YAEjC9C,aAAa,WAAWuC;YAExB,MAAMS,aAAa;gBACjB,GAAGT,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YACA,IAAIU,UAAUC,iCAAiCpC,QAAQ,IAAI,EAAEkC;YAE7D,MAAMG,mBAAmBF,UACrBG,SAAS;gBAAE,GAAGH,QAAQ,MAAM,CAAC,EAAE;gBAAE,GAAGA,QAAQ,MAAM,CAAC,EAAE;YAAC,GAAGD,cACzD;YAEJ,IAAI,CAACC,WAAWE,mBAAmBE,mBACjCJ,UAAU5B,wBAAwB2B;YAGpC,IAAIC,SAAS;gBACXT,kBAAkB;oBAACS;iBAAQ;gBAC3BR,SAAS,EAAE;YACb;QACF;IACF,EAAE,OAAOa,GAAG;QACV,MAAMC,MACJD,aAAaE,QACT,CAAC,sBAAsB,EAAEF,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACb,UAAUA,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM,AAAD,MAAM,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEc,IAAI,CAAC,CAAC;aAFtBd,SAAS;YAACc;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMhB;QACN,aAAa;YACX,UAAUC;YACVC;QACF;QACAJ;QACAjB;QACA,OAAOe,IAAI,KAAK;QAChB,kBACE,AAAuB,YAAvB,OAAOA,IAAI,OAAO,IAClBA,AAAgB,SAAhBA,IAAI,OAAO,IACX,sBAAsBA,IAAI,OAAO,GAC5BA,IAAI,OAAO,CAAS,gBAAgB,GACrCsB;IACR;AACF;AAEO,eAAeC,gBAAgB7C,OAIrC;IAOC,MAAM,EAAEC,OAAO,EAAE6C,kBAAkB,EAAE,GAAG9C;IACxC,MAAM,EAAEI,gBAAgB,EAAE,GAAGH;IAE7B,MAAMI,mBAAsC;QAC1C,QAAQ;IACV;IAEA,MAAMQ,eAAekC,4BACnBhC,aAAaV;IAEf,MAAM2C,gCAAgC,MAAMC,0BAA0B,MAAM,CAAC;QAC3E,oBAAoB3D,wBAAwBwD;IAC9C;IACA,MAAMnD,OAAe;QACnB;YAAE,MAAM;YAAU,SAASkB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKT;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAM4C;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAM3B,SAAS,MAAM3B,mBAAmB;YACtC,QAAQsD,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACAnD,KAAK,IAAI,IAAIwB;IACf;IAEA,MAAM+B,SAAS,MAAMC,SACnBxD,MACA4B,aAAa,YAAY,EACzB;QACE,QAAQ;IACV;IAGF,IAAI6B;IACJ,MAAMC,cAAcH,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIG,aAAa;QACf,MAAMC,aAAapB,gBACjBmB,aACApD,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAEFhB,aAAa,0BAA0BiE;QAEvC,MAAMC,oBAAoBL,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9D7D,aAAa,wBAAwBkE;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAAS5B,MAAM,OAAO,CAAC4B,OAC/B,GAAG,CAAC,CAACA,OACGvB,gBACLuB,MACAxD,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAGNhB,aAAa,qBAAqBmE;QAGlC,MAAME,aAAaC,WAAW;YAACL;eAAeE;SAAe;QAC7DnE,aAAa,iBAAiBqE;QAG9BN,cAAcQ,iBAAiBF,YAAYzD,QAAQ,IAAI,EAAEI;QACzDhB,aAAa,2BAA2B+D;IAC1C;IAEA,IAAIS,cAAczD;IAClB,IAAIgD,aACFS,cAAc,MAAMC,WAClB1D,kBACAgD,aACAW,eAAe;QACb,QAAQ;IACV;IAIJ,OAAO;QACL,MAAMX;QACNS;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAazB,KAAK,SAAS,CAACyB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAec,qBAGpBhE,OAMD;QAqDKiE;IApDJ,MAAM,EACJC,SAAS,EACTjE,OAAO,EACPkE,aAAa,EACb1E,gBAAgB,EAChBY,gBAAgB,EACjB,GAAGL;IACJ,MAAMa,eAAeuD;IAErB,MAAM,EAAEhE,gBAAgB,EAAE,GAAGH;IAE7B,MAAM,EAAEK,WAAW,EAAEC,WAAW,EAAE,GAAG,MAAME,iBACzCR,SACAI,kBACA;QACE,oBAAoB;QACpB,sBAAsB;QACtB,aAAa;QACb,aAAa8D,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,WAAW;IACzC;IAGF,MAAME,wBAAwB,MAAMC,uBAClChE,aACA4D;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,kBAAkB,AAAD,MAAM,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAKnE;YACL,QAAQ;QACV;IACF;IAGFmE,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAM1E,OAAe;QACnB;YAAE,MAAM;YAAU,SAASkB;QAAa;QACxC;YACE,MAAM;YACN,SAAS0D;QACX;KACD;IAED,IAAI,QAAAN,CAAAA,yBAAAA,QAAQ,aAAa,AAAD,IAApBA,KAAAA,IAAAA,uBAAuB,aAAa,EACtCtE,KAAK,IAAI,CAAC;QACR,MAAM;QACN,SAAS;IACX;IAGF,IAAIF,kBAAkB;QACpB,MAAM0B,SAAS,MAAM3B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAE,KAAK,IAAI,IAAIwB;IACf;IAEA,MAAM+B,SAAS,MAAMC,SACnBxD,MACA4B,aAAa,YAAY,EACzBlB;IAEF,OAAO;QACL,aAAa6C,OAAO,OAAO;QAC3B3C;QACA,OAAO2C,OAAO,KAAK;IACrB;AACF"}
1
+ {"version":3,"file":"ai-model/inspect.mjs","sources":["webpack://@midscene/core/./src/ai-model/inspect.ts"],"sourcesContent":["import type {\n AIDataExtractionResponse,\n AIElementLocatorResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n BaseElement,\n ElementById,\n InsightExtractOption,\n Rect,\n ReferenceImage,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from './common';\nimport {\n AIActionType,\n adaptBboxToRect,\n expandSearchArea,\n markupImageForLLM,\n mergeRects,\n} from './common';\nimport {\n extractDataQueryPrompt,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n describeUserPage,\n distance,\n distanceThreshold,\n elementByPositionWithElementInfo,\n} from './prompt/util';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `reference image ${item.name}:`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement<\n ElementType extends BaseElement = BaseElement,\n>(options: {\n context: UIContext<ElementType>;\n targetElementDescription: TUserPrompt;\n referenceImage?: ReferenceImage;\n callAIFn: typeof callAIWithObjectResponse<\n AIElementResponse | [number, number]\n >;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n}): Promise<{\n parseResult: AIElementLocatorResponse;\n rect?: Rect;\n rawResponse: string;\n elementById: ElementById;\n usage?: AIUsageInfo;\n isOrderSensitive?: boolean;\n}> {\n const { context, targetElementDescription, callAIFn, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n const { description, elementById, insertElementByPosition } =\n await describeUserPage(context, { vlMode });\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const userInstructionPrompt = await findElementPrompt.format({\n pageDescription: description,\n targetElementDescription: extraTextFromUserPrompt(targetElementDescription),\n });\n const systemPrompt = systemPromptToLocateElement(vlMode);\n\n let imagePayload = screenshotBase64;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n } else if (vlMode === 'qwen-vl') {\n imagePayload = await paddingToMatchBlockByBase64(imagePayload);\n } else if (!vlMode) {\n imagePayload = await markupImageForLLM(\n screenshotBase64,\n context.tree,\n context.size,\n );\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT, modelConfig);\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements: AIElementLocatorResponse['elements'] =\n 'elements' in res.content ? res.content.elements : [];\n let errors: AIElementLocatorResponse['errors'] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if ('bbox' in res.content && Array.isArray(res.content.bbox)) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n options.searchConfig?.rect?.width || context.size.width,\n options.searchConfig?.rect?.height || context.size.height,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n vlMode,\n );\n debugInspect('resRect', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n let element = elementByPositionWithElementInfo(context.tree, rectCenter);\n\n const distanceToCenter = element\n ? distance({ x: element.center[0], y: element.center[1] }, rectCenter)\n : 0;\n\n if (!element || distanceToCenter > distanceThreshold) {\n element = insertElementByPosition(rectCenter);\n }\n\n if (element) {\n matchedElements = [element];\n errors = [];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements,\n errors,\n },\n rawResponse,\n elementById,\n usage: res.usage,\n isOrderSensitive:\n typeof res.content === 'object' &&\n res.content !== null &&\n 'isOrderSensitive' in res.content\n ? (res.content as any).isOrderSensitive\n : undefined,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext<BaseElement>;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n const systemPrompt = systemPromptToLocateSection(vlMode);\n const sectionLocatorInstructionText = await sectionLocatorInstruction.format({\n sectionDescription: extraTextFromUserPrompt(sectionDescription),\n });\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n vlMode,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n vlMode,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n // expand search area to at least 200 x 200\n sectionRect = expandSearchArea(mergedRect, context.size, vlMode);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n if (sectionRect) {\n imageBase64 = await cropByRect(\n screenshotBase64,\n sectionRect,\n vlMode === 'qwen-vl',\n );\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<\n T,\n ElementType extends BaseElement = BaseElement,\n>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext<ElementType>;\n extractOption?: InsightExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const { vlMode } = modelConfig;\n const systemPrompt = systemPromptToExtract();\n\n const { screenshotBase64 } = context;\n\n const { description, elementById } = await describeUserPage(context, {\n truncateTextLength: 200,\n filterNonTextContent: false,\n visibleOnly: false,\n domIncluded: extractOption?.domIncluded,\n vlMode,\n });\n\n const extractDataPromptText = await extractDataQueryPrompt(\n description,\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (options.extractOption?.returnThought) {\n msgs.push({\n role: 'user',\n content: 'Please provide reasons.',\n });\n }\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AIDataExtractionResponse<T>>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n return {\n parseResult: result.content,\n elementById,\n usage: result.usage,\n };\n}\n"],"names":["debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","_multimodalPrompt_images","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","callAIFn","modelConfig","vlMode","screenshotBase64","description","elementById","insertElementByPosition","describeUserPage","assert","userInstructionPrompt","findElementPrompt","systemPrompt","systemPromptToLocateElement","imagePayload","paddingToMatchBlockByBase64","markupImageForLLM","addOns","res","AIActionType","rawResponse","JSON","resRect","matchedElements","errors","Array","_options_searchConfig_rect","_options_searchConfig_rect1","_options_searchConfig_rect2","_options_searchConfig_rect3","adaptBboxToRect","rectCenter","element","elementByPositionWithElementInfo","distanceToCenter","distance","distanceThreshold","e","msg","Error","undefined","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","callAIWithObjectResponse","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","cropByRect","AiExtractElementInfo","_options_extractOption","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent"],"mappings":";;;;;;;;;AA0DA,MAAMA,eAAeC,SAAS;AAC9B,MAAMC,eAAeD,SAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;QAGIC;IADJ,MAAMC,OAAyC,EAAE;IACjD,IAAID,QAAAA,mBAAAA,KAAAA,IAAAA,QAAAA,CAAAA,2BAAAA,iBAAkB,MAAM,AAAD,IAAvBA,KAAAA,IAAAA,yBAA0B,MAAM,EAAE;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQH,iBAAiB,MAAM,CAAE;YAC1C,MAAMI,SAAS,MAAMC,mBACnBF,KAAK,GAAG,EACR,CAAC,CAACH,iBAAiB,uBAAuB;YAG5CE,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,gBAAgB,EAAEC,KAAK,IAAI,CAAC,CAAC,CAAC;oBACvC;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAEpBC,OASD;IAQC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,QAAQ,EAAEC,WAAW,EAAE,GAAGJ;IACrE,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7B,MAAM,EAAEM,WAAW,EAAEC,WAAW,EAAEC,uBAAuB,EAAE,GACzD,MAAMC,iBAAiBT,SAAS;QAAEI;IAAO;IAE3CM,OACET,0BACA;IAEF,MAAMU,wBAAwB,MAAMC,kBAAkB,MAAM,CAAC;QAC3D,iBAAiBN;QACjB,0BAA0BjB,wBAAwBY;IACpD;IACA,MAAMY,eAAeC,4BAA4BV;IAEjD,IAAIW,eAAeV;IAEnB,IAAIN,QAAQ,YAAY,EAAE;QACxBW,OACEX,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFW,OACEX,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFgB,eAAehB,QAAQ,YAAY,CAAC,WAAW;IACjD,OAAO,IAAIK,AAAW,cAAXA,QACTW,eAAe,MAAMC,4BAA4BD;SAC5C,IAAI,CAACX,QACVW,eAAe,MAAME,kBACnBZ,kBACAL,QAAQ,IAAI,EACZA,QAAQ,IAAI;IAIhB,MAAMN,OAAe;QACnB;YAAE,MAAM;YAAU,SAASmB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKE;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMJ;gBACR;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAOV,0BAAuC;QAChD,MAAMiB,SAAS,MAAM3B,mBAAmB;YACtC,QAAQU,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAIwB;IACf;IAEA,MAAMC,MAAM,MAAMjB,SAASR,MAAM0B,aAAa,eAAe,EAAEjB;IAE/D,MAAMkB,cAAcC,KAAK,SAAS,CAACH,IAAI,OAAO;IAE9C,IAAII;IACJ,IAAIC,kBACF,cAAcL,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,QAAQ,GAAG,EAAE;IACvD,IAAIM,SACF,YAAYN,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IAAI,UAAUA,IAAI,OAAO,IAAIO,MAAM,OAAO,CAACP,IAAI,OAAO,CAAC,IAAI,GAAG;gBAG1DQ,4BAAAA,uBACAC,6BAAAA,wBACAC,6BAAAA,wBACAC,6BAAAA;YALFP,UAAUQ,gBACRZ,IAAI,OAAO,CAAC,IAAI,EAChBQ,AAAAA,SAAAA,CAAAA,wBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,6BAAAA,sBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,2BAA4B,KAAK,AAAD,KAAK3B,QAAQ,IAAI,CAAC,KAAK,EACvD4B,AAAAA,SAAAA,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,MAAM,AAAD,KAAK5B,QAAQ,IAAI,CAAC,MAAM,UACzD6B,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,IAAI,UAChCC,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,GAAG,EAC/B1B;YAEFlB,aAAa,WAAWqC;YAExB,MAAMS,aAAa;gBACjB,GAAGT,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YACA,IAAIU,UAAUC,iCAAiClC,QAAQ,IAAI,EAAEgC;YAE7D,MAAMG,mBAAmBF,UACrBG,SAAS;gBAAE,GAAGH,QAAQ,MAAM,CAAC,EAAE;gBAAE,GAAGA,QAAQ,MAAM,CAAC,EAAE;YAAC,GAAGD,cACzD;YAEJ,IAAI,CAACC,WAAWE,mBAAmBE,mBACjCJ,UAAUzB,wBAAwBwB;YAGpC,IAAIC,SAAS;gBACXT,kBAAkB;oBAACS;iBAAQ;gBAC3BR,SAAS,EAAE;YACb;QACF;IACF,EAAE,OAAOa,GAAG;QACV,MAAMC,MACJD,aAAaE,QACT,CAAC,sBAAsB,EAAEF,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACb,UAAUA,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM,AAAD,MAAM,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEc,IAAI,CAAC,CAAC;aAFtBd,SAAS;YAACc;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMhB;QACN,aAAa;YACX,UAAUC;YACVC;QACF;QACAJ;QACAd;QACA,OAAOY,IAAI,KAAK;QAChB,kBACE,AAAuB,YAAvB,OAAOA,IAAI,OAAO,IAClBA,AAAgB,SAAhBA,IAAI,OAAO,IACX,sBAAsBA,IAAI,OAAO,GAC5BA,IAAI,OAAO,CAAS,gBAAgB,GACrCsB;IACR;AACF;AAEO,eAAeC,gBAAgB3C,OAIrC;IAOC,MAAM,EAAEC,OAAO,EAAE2C,kBAAkB,EAAExC,WAAW,EAAE,GAAGJ;IACrD,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7B,MAAMa,eAAe+B,4BAA4BxC;IACjD,MAAMyC,gCAAgC,MAAMC,0BAA0B,MAAM,CAAC;QAC3E,oBAAoBzD,wBAAwBsD;IAC9C;IACA,MAAMjD,OAAe;QACnB;YAAE,MAAM;YAAU,SAASmB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKR;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMwC;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMzB,SAAS,MAAM3B,mBAAmB;YACtC,QAAQoD,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACAjD,KAAK,IAAI,IAAIwB;IACf;IAEA,MAAM6B,SAAS,MAAMC,yBACnBtD,MACA0B,aAAa,YAAY,EACzBjB;IAGF,IAAI8C;IACJ,MAAMC,cAAcH,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIG,aAAa;QACf,MAAMC,aAAapB,gBACjBmB,aACAlD,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAI;QAEFhB,aAAa,0BAA0B+D;QAEvC,MAAMC,oBAAoBL,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9D3D,aAAa,wBAAwBgE;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAAS5B,MAAM,OAAO,CAAC4B,OAC/B,GAAG,CAAC,CAACA,OACGvB,gBACLuB,MACAtD,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAI;QAGNhB,aAAa,qBAAqBiE;QAGlC,MAAME,aAAaC,WAAW;YAACL;eAAeE;SAAe;QAC7DjE,aAAa,iBAAiBmE;QAG9BN,cAAcQ,iBAAiBF,YAAYvD,QAAQ,IAAI,EAAEI;QACzDhB,aAAa,2BAA2B6D;IAC1C;IAEA,IAAIS,cAAcrD;IAClB,IAAI4C,aACFS,cAAc,MAAMC,WAClBtD,kBACA4C,aACA7C,AAAW,cAAXA;IAIJ,OAAO;QACL,MAAM6C;QACNS;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAazB,KAAK,SAAS,CAACyB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAea,qBAGpB7D,OAMD;QA8CK8D;IA7CJ,MAAM,EAAEC,SAAS,EAAE9D,OAAO,EAAE+D,aAAa,EAAEvE,gBAAgB,EAAEW,WAAW,EAAE,GACxEJ;IACF,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAMU,eAAemD;IAErB,MAAM,EAAE3D,gBAAgB,EAAE,GAAGL;IAE7B,MAAM,EAAEM,WAAW,EAAEC,WAAW,EAAE,GAAG,MAAME,iBAAiBT,SAAS;QACnE,oBAAoB;QACpB,sBAAsB;QACtB,aAAa;QACb,aAAa+D,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,WAAW;QACvC3D;IACF;IAEA,MAAM6D,wBAAwB,MAAMC,uBAClC5D,aACAwD;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,kBAAkB,AAAD,MAAM,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAK9D;YACL,QAAQ;QACV;IACF;IAGF8D,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAMvE,OAAe;QACnB;YAAE,MAAM;YAAU,SAASmB;QAAa;QACxC;YACE,MAAM;YACN,SAASsD;QACX;KACD;IAED,IAAI,QAAAN,CAAAA,yBAAAA,QAAQ,aAAa,AAAD,IAApBA,KAAAA,IAAAA,uBAAuB,aAAa,EACtCnE,KAAK,IAAI,CAAC;QACR,MAAM;QACN,SAAS;IACX;IAGF,IAAIF,kBAAkB;QACpB,MAAM0B,SAAS,MAAM3B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAE,KAAK,IAAI,IAAIwB;IACf;IAEA,MAAM6B,SAAS,MAAMC,yBACnBtD,MACA0B,aAAa,YAAY,EACzBjB;IAEF,OAAO;QACL,aAAa4C,OAAO,OAAO;QAC3BxC;QACA,OAAOwC,OAAO,KAAK;IACrB;AACF"}