@midscene/core 1.9.1 → 1.9.2-beta-20260605084246.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/dist/es/agent/agent.mjs +2 -1
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/task-cache.mjs +43 -8
  4. package/dist/es/agent/task-cache.mjs.map +1 -1
  5. package/dist/es/agent/tasks.mjs +18 -3
  6. package/dist/es/agent/tasks.mjs.map +1 -1
  7. package/dist/es/agent/ui-utils.mjs +2 -1
  8. package/dist/es/agent/ui-utils.mjs.map +1 -1
  9. package/dist/es/agent/utils.mjs +1 -1
  10. package/dist/es/ai-model/inspect.mjs +8 -60
  11. package/dist/es/ai-model/inspect.mjs.map +1 -1
  12. package/dist/es/ai-model/llm-planning.mjs +6 -3
  13. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  14. package/dist/es/ai-model/models/auto-glm/planning.mjs +8 -3
  15. package/dist/es/ai-model/models/auto-glm/planning.mjs.map +1 -1
  16. package/dist/es/ai-model/models/ui-tars/planning.mjs +6 -2
  17. package/dist/es/ai-model/models/ui-tars/planning.mjs.map +1 -1
  18. package/dist/es/common.mjs +50 -3
  19. package/dist/es/common.mjs.map +1 -1
  20. package/dist/es/types.mjs.map +1 -1
  21. package/dist/es/utils.mjs +2 -2
  22. package/dist/es/yaml/player.mjs +9 -4
  23. package/dist/es/yaml/player.mjs.map +1 -1
  24. package/dist/lib/agent/agent.js +2 -1
  25. package/dist/lib/agent/agent.js.map +1 -1
  26. package/dist/lib/agent/task-cache.js +43 -8
  27. package/dist/lib/agent/task-cache.js.map +1 -1
  28. package/dist/lib/agent/tasks.js +17 -2
  29. package/dist/lib/agent/tasks.js.map +1 -1
  30. package/dist/lib/agent/ui-utils.js +3 -2
  31. package/dist/lib/agent/ui-utils.js.map +1 -1
  32. package/dist/lib/agent/utils.js +1 -1
  33. package/dist/lib/ai-model/inspect.js +7 -59
  34. package/dist/lib/ai-model/inspect.js.map +1 -1
  35. package/dist/lib/ai-model/llm-planning.js +6 -3
  36. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  37. package/dist/lib/ai-model/models/auto-glm/planning.js +8 -3
  38. package/dist/lib/ai-model/models/auto-glm/planning.js.map +1 -1
  39. package/dist/lib/ai-model/models/ui-tars/planning.js +6 -2
  40. package/dist/lib/ai-model/models/ui-tars/planning.js.map +1 -1
  41. package/dist/lib/common.js +59 -3
  42. package/dist/lib/common.js.map +1 -1
  43. package/dist/lib/types.js.map +1 -1
  44. package/dist/lib/utils.js +2 -2
  45. package/dist/lib/yaml/player.js +9 -4
  46. package/dist/lib/yaml/player.js.map +1 -1
  47. package/dist/types/agent/agent.d.ts +2 -2
  48. package/dist/types/agent/task-cache.d.ts +18 -2
  49. package/dist/types/agent/tasks.d.ts +15 -2
  50. package/dist/types/ai-model/inspect.d.ts +1 -2
  51. package/dist/types/ai-model/llm-planning.d.ts +2 -1
  52. package/dist/types/ai-model/models/auto-glm/planning.d.ts +2 -1
  53. package/dist/types/ai-model/models/ui-tars/planning.d.ts +2 -1
  54. package/dist/types/ai-model/workflows/planning/types.d.ts +4 -1
  55. package/dist/types/common.d.ts +4 -0
  56. package/dist/types/types.d.ts +1 -1
  57. package/dist/types/yaml.d.ts +4 -3
  58. package/package.json +2 -2
@@ -1,6 +1,6 @@
1
+ import { buildYamlFlowFromPlans, findAllMidsceneLocatorField, userPromptToString } from "../common.mjs";
1
2
  import { getDebug } from "@midscene/shared/logger";
2
3
  import { assert } from "@midscene/shared/utils";
3
- import { buildYamlFlowFromPlans, findAllMidsceneLocatorField } from "../common.mjs";
4
4
  import { planningModelFamilyRequiredForLocateMessage } from "./errors.mjs";
5
5
  import { systemPromptToTaskPlanning } from "./prompt/llm-planning.mjs";
6
6
  import { extractXMLTag, parseMarkFinishedIndexes, parseSubGoalsFromXML } from "./prompt/util.mjs";
@@ -100,17 +100,20 @@ async function plan(userInstruction, opts) {
100
100
  policy: adapter.imagePreprocess
101
101
  });
102
102
  const imagePayload = preparedImage.imageBase64;
103
+ const userInstructionText = userPromptToString(userInstruction);
103
104
  const actionContext = opts.actionContext ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\n` : '';
105
+ const referenceImageMessages = opts.referenceImageMessages ?? [];
104
106
  const instruction = [
105
107
  {
106
108
  role: 'user',
107
109
  content: [
108
110
  {
109
111
  type: 'text',
110
- text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`
112
+ text: `${actionContext}<user_instruction>${userInstructionText}</user_instruction>`
111
113
  }
112
114
  ]
113
- }
115
+ },
116
+ ...referenceImageMessages
114
117
  ];
115
118
  let latestFeedbackMessage;
116
119
  const executionProgressText = includeSubGoals ? conversationHistory.subGoalsToText() : conversationHistory.historicalLogsToText();
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/llm-planning.mjs","sources":["../../../src/ai-model/llm-planning.ts"],"sourcesContent":["import type {\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n} from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport { buildYamlFlowFromPlans, findAllMidsceneLocatorField } from '../common';\nimport { planningModelFamilyRequiredForLocateMessage } from './errors';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport {\n extractXMLTag,\n parseMarkFinishedIndexes,\n parseSubGoalsFromXML,\n} from './prompt/util';\nimport { AIResponseParseError, callAI } from './service-caller/index';\nimport type { JsonParser, JsonParserSource } from './service-caller/json';\nimport { prepareModelImage } from './workflows/image-preprocess';\nimport type { PlanOptions } from './workflows/planning/types';\n\nconst debug = getDebug('planning');\nconst warnLog = getDebug('planning', { console: true });\n\nconst noPreviousActionsText =\n 'No previous actions have been executed in this aiAct execution yet. If the instruction asks for actions, choose the first action to execute.';\n\n/**\n * Parse XML response from LLM and convert to RawResponsePlanningAIResponse.\n */\nexport function parseXMLPlanningResponse(\n xmlString: string,\n jsonParser: JsonParser,\n): RawResponsePlanningAIResponse {\n const thought = extractXMLTag(xmlString, 'thought');\n const memory = extractXMLTag(xmlString, 'memory');\n const log = extractXMLTag(xmlString, 'log') || '';\n const error = extractXMLTag(xmlString, 'error');\n const actionType = extractXMLTag(xmlString, 'action-type');\n const actionParamStr = extractXMLTag(xmlString, 'action-param-json');\n\n // Parse <complete> tag with success attribute\n const completeGoalRegex =\n /<complete\\s+success=\"(true|false)\">([\\s\\S]*?)<\\/complete>/i;\n const completeGoalMatch = xmlString.match(completeGoalRegex);\n let finalizeMessage: string | undefined;\n let finalizeSuccess: boolean | undefined;\n\n if (completeGoalMatch) {\n finalizeSuccess = completeGoalMatch[1] === 'true';\n finalizeMessage = completeGoalMatch[2]?.trim() || undefined;\n }\n\n // Parse sub-goal related tags\n const updatePlanContent = extractXMLTag(xmlString, 'update-plan-content');\n const markSubGoalDone = extractXMLTag(xmlString, 'mark-sub-goal-done');\n\n const updateSubGoals = updatePlanContent\n ? parseSubGoalsFromXML(updatePlanContent)\n : undefined;\n const markFinishedIndexes = markSubGoalDone\n ? parseMarkFinishedIndexes(markSubGoalDone)\n : undefined;\n\n // Parse action\n let action: any = null;\n if (actionType && actionType.toLowerCase() !== 'null') {\n // Strip any trailing XML tags that LLM might have leaked into the action type\n // e.g. \"KeyboardPress</action-type>\\n<action-param-json>\" -> \"KeyboardPress\"\n const type = actionType.split('<')[0].trim();\n let param: any = undefined;\n\n if (actionParamStr) {\n try {\n // Parse the JSON string in action-param-json\n param = jsonParser(actionParamStr, {\n source: 'planning-action-param',\n preserveStringValueKeys:\n type.toLowerCase() === 'input' ? ['value'] : undefined,\n });\n } catch (e) {\n throw new Error(`Failed to parse action-param-json: ${e}`);\n }\n }\n\n action = {\n type,\n ...(param !== undefined ? { param } : {}),\n };\n }\n\n return {\n ...(thought ? { thought } : {}),\n ...(memory ? { memory } : {}),\n log,\n ...(error ? { error } : {}),\n action,\n ...(finalizeMessage !== undefined ? { finalizeMessage } : {}),\n ...(finalizeSuccess !== undefined ? { finalizeSuccess } : {}),\n ...(updateSubGoals?.length ? { updateSubGoals } : {}),\n ...(markFinishedIndexes?.length ? { markFinishedIndexes } : {}),\n };\n}\n\nexport async function plan(\n userInstruction: string,\n opts: PlanOptions,\n): Promise<PlanningAIResponse> {\n const { context, conversationHistory } = opts;\n const modelRuntime = opts.modelRuntime;\n const { adapter } = modelRuntime;\n const { shotSize } = context;\n const screenshotBase64 = context.screenshot.base64;\n\n if (opts.includeLocateInPlanning && !modelRuntime.config.modelFamily) {\n throw new Error(\n planningModelFamilyRequiredForLocateMessage(modelRuntime.config.slot),\n );\n }\n\n const locateResultAdapter =\n modelRuntime.config.modelFamily && adapter.locate.kind === 'standard'\n ? adapter.locate.resultAdapter\n : undefined;\n\n // Only enable sub-goals when aiAct is in deep-thinking planning mode.\n const includeSubGoals = opts.deepThink === true;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n locatePromptSpec: locateResultAdapter?.promptSpec,\n includeLocateInPlanning: opts.includeLocateInPlanning,\n includeThought: true, // always include thought\n includeSubGoals,\n });\n\n const preparedImage = await prepareModelImage({\n imageBase64: screenshotBase64,\n width: shotSize.width,\n height: shotSize.height,\n policy: adapter.imagePreprocess,\n });\n const imagePayload = preparedImage.imageBase64;\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n // Build sub-goal status text to include in the message\n // In planning deep-think mode: show full sub-goals with logs\n // Otherwise: show historical execution logs\n const executionProgressText = includeSubGoals\n ? conversationHistory.subGoalsToText()\n : conversationHistory.historicalLogsToText();\n const executionProgressSection = executionProgressText\n ? `\\n\\n${executionProgressText}`\n : conversationHistory.pendingFeedbackMessage\n ? ''\n : `\\n\\n${noPreviousActionsText}`;\n\n // Build memories text to include in the message\n const memoriesText = conversationHistory.memoriesToText();\n const memoriesSection = memoriesText ? `\\n\\n${memoriesText}` : '';\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The previous action has been executed, here is the latest screenshot. Please continue according to the instruction.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `This is the current screenshot.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n\n // Compress history if it exceeds the threshold to avoid context overflow\n conversationHistory.compressHistory(50, 20);\n\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n let {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelRuntime, {\n abortSignal: opts.abortSignal,\n // Planning with locate results is localization-sensitive. Adapters decide\n // whether this should request original image detail.\n requiresOriginalImageDetail: opts.includeLocateInPlanning,\n });\n\n // Parse XML response to JSON object, retry once on parse failure\n let planFromAI: RawResponsePlanningAIResponse;\n try {\n try {\n planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);\n } catch {\n const retry = await callAI(msgs, modelRuntime, {\n abortSignal: opts.abortSignal,\n // Keep retry requests consistent with the initial planning call.\n requiresOriginalImageDetail: opts.includeLocateInPlanning,\n });\n rawResponse = retry.content;\n usage = retry.usage;\n reasoning_content = retry.reasoning_content;\n planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);\n }\n\n if (planFromAI.action && planFromAI.finalizeSuccess !== undefined) {\n warnLog(\n 'Planning response included both an action and <complete>; ignoring <complete> output.',\n );\n planFromAI.finalizeMessage = undefined;\n planFromAI.finalizeSuccess = undefined;\n }\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n\n // Check if task is completed via <complete> tag\n if (planFromAI.finalizeSuccess !== undefined) {\n debug('task completed via <complete> tag, stop planning');\n shouldContinuePlanning = false;\n // Mark all sub-goals as finished when goal is completed in planning deep-think mode.\n if (includeSubGoals) {\n conversationHistory.markAllSubGoalsFinished();\n }\n }\n\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult) {\n if (!opts.includeLocateInPlanning) {\n if (typeof locateResult === 'object') {\n // In prompt-only planning mode, ignore any accidental coordinates from the model.\n action.param[field] = { prompt: locateResult.prompt };\n }\n return;\n }\n\n assert(\n locateResultAdapter,\n 'generic planning locate normalization requires a standard locate adapter',\n );\n action.param[field] = {\n ...locateResult,\n locatedPixelBbox: locateResultAdapter.adaptPlanningParamToPixelBbox(\n locateResult,\n {\n preparedSize: preparedImage.preparedSize,\n contentSize: preparedImage.contentSize,\n },\n ),\n };\n }\n });\n });\n\n // Update sub-goals in conversation history only in planning deep-think mode.\n if (includeSubGoals) {\n if (planFromAI.updateSubGoals?.length) {\n conversationHistory.mergeSubGoals(planFromAI.updateSubGoals);\n }\n if (planFromAI.markFinishedIndexes?.length) {\n for (const index of planFromAI.markFinishedIndexes) {\n conversationHistory.markSubGoalFinished(index);\n }\n }\n // Append the planning log to the currently running sub-goal\n if (planFromAI.log) {\n conversationHistory.appendSubGoalLog(planFromAI.log);\n }\n } else {\n // Without planning deep-think mode, accumulate logs as historical execution steps.\n if (planFromAI.log) {\n conversationHistory.appendHistoricalLog(planFromAI.log);\n }\n }\n\n // Append memory to conversation history if present\n if (planFromAI.memory) {\n conversationHistory.appendMemory(planFromAI.memory);\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n}\n"],"names":["debug","getDebug","warnLog","noPreviousActionsText","parseXMLPlanningResponse","xmlString","jsonParser","thought","extractXMLTag","memory","log","error","actionType","actionParamStr","completeGoalRegex","completeGoalMatch","finalizeMessage","finalizeSuccess","undefined","updatePlanContent","markSubGoalDone","updateSubGoals","parseSubGoalsFromXML","markFinishedIndexes","parseMarkFinishedIndexes","action","type","param","e","Error","plan","userInstruction","opts","context","conversationHistory","modelRuntime","adapter","shotSize","screenshotBase64","planningModelFamilyRequiredForLocateMessage","locateResultAdapter","includeSubGoals","systemPrompt","systemPromptToTaskPlanning","preparedImage","prepareModelImage","imagePayload","actionContext","instruction","latestFeedbackMessage","executionProgressText","executionProgressSection","memoriesText","memoriesSection","historyLog","msgs","rawResponse","usage","reasoning_content","callAI","planFromAI","retry","actions","shouldContinuePlanning","returnValue","buildYamlFlowFromPlans","assert","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","index","parseError","errorMessage","String","AIResponseParseError"],"mappings":";;;;;;;;AAoBA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,UAAUD,SAAS,YAAY;IAAE,SAAS;AAAK;AAErD,MAAME,wBACJ;AAKK,SAASC,yBACdC,SAAiB,EACjBC,UAAsB;IAEtB,MAAMC,UAAUC,cAAcH,WAAW;IACzC,MAAMI,SAASD,cAAcH,WAAW;IACxC,MAAMK,MAAMF,cAAcH,WAAW,UAAU;IAC/C,MAAMM,QAAQH,cAAcH,WAAW;IACvC,MAAMO,aAAaJ,cAAcH,WAAW;IAC5C,MAAMQ,iBAAiBL,cAAcH,WAAW;IAGhD,MAAMS,oBACJ;IACF,MAAMC,oBAAoBV,UAAU,KAAK,CAACS;IAC1C,IAAIE;IACJ,IAAIC;IAEJ,IAAIF,mBAAmB;QACrBE,kBAAkBF,AAAyB,WAAzBA,iBAAiB,CAAC,EAAE;QACtCC,kBAAkBD,iBAAiB,CAAC,EAAE,EAAE,UAAUG;IACpD;IAGA,MAAMC,oBAAoBX,cAAcH,WAAW;IACnD,MAAMe,kBAAkBZ,cAAcH,WAAW;IAEjD,MAAMgB,iBAAiBF,oBACnBG,qBAAqBH,qBACrBD;IACJ,MAAMK,sBAAsBH,kBACxBI,yBAAyBJ,mBACzBF;IAGJ,IAAIO,SAAc;IAClB,IAAIb,cAAcA,AAA6B,WAA7BA,WAAW,WAAW,IAAe;QAGrD,MAAMc,OAAOd,WAAW,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI;QAC1C,IAAIe;QAEJ,IAAId,gBACF,IAAI;YAEFc,QAAQrB,WAAWO,gBAAgB;gBACjC,QAAQ;gBACR,yBACEa,AAAuB,YAAvBA,KAAK,WAAW,KAAiB;oBAAC;iBAAQ,GAAGR;YACjD;QACF,EAAE,OAAOU,GAAG;YACV,MAAM,IAAIC,MAAM,CAAC,mCAAmC,EAAED,GAAG;QAC3D;QAGFH,SAAS;YACPC;YACA,GAAIC,AAAUT,WAAVS,QAAsB;gBAAEA;YAAM,IAAI,CAAC,CAAC;QAC1C;IACF;IAEA,OAAO;QACL,GAAIpB,UAAU;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAC9B,GAAIE,SAAS;YAAEA;QAAO,IAAI,CAAC,CAAC;QAC5BC;QACA,GAAIC,QAAQ;YAAEA;QAAM,IAAI,CAAC,CAAC;QAC1Bc;QACA,GAAIT,AAAoBE,WAApBF,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAIC,AAAoBC,WAApBD,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAII,gBAAgB,SAAS;YAAEA;QAAe,IAAI,CAAC,CAAC;QACpD,GAAIE,qBAAqB,SAAS;YAAEA;QAAoB,IAAI,CAAC,CAAC;IAChE;AACF;AAEO,eAAeO,KACpBC,eAAuB,EACvBC,IAAiB;IAEjB,MAAM,EAAEC,OAAO,EAAEC,mBAAmB,EAAE,GAAGF;IACzC,MAAMG,eAAeH,KAAK,YAAY;IACtC,MAAM,EAAEI,OAAO,EAAE,GAAGD;IACpB,MAAM,EAAEE,QAAQ,EAAE,GAAGJ;IACrB,MAAMK,mBAAmBL,QAAQ,UAAU,CAAC,MAAM;IAElD,IAAID,KAAK,uBAAuB,IAAI,CAACG,aAAa,MAAM,CAAC,WAAW,EAClE,MAAM,IAAIN,MACRU,4CAA4CJ,aAAa,MAAM,CAAC,IAAI;IAIxE,MAAMK,sBACJL,aAAa,MAAM,CAAC,WAAW,IAAIC,AAAwB,eAAxBA,QAAQ,MAAM,CAAC,IAAI,GAClDA,QAAQ,MAAM,CAAC,aAAa,GAC5BlB;IAGN,MAAMuB,kBAAkBT,AAAmB,SAAnBA,KAAK,SAAS;IAEtC,MAAMU,eAAe,MAAMC,2BAA2B;QACpD,aAAaX,KAAK,WAAW;QAC7B,kBAAkBQ,qBAAqB;QACvC,yBAAyBR,KAAK,uBAAuB;QACrD,gBAAgB;QAChBS;IACF;IAEA,MAAMG,gBAAgB,MAAMC,kBAAkB;QAC5C,aAAaP;QACb,OAAOD,SAAS,KAAK;QACrB,QAAQA,SAAS,MAAM;QACvB,QAAQD,QAAQ,eAAe;IACjC;IACA,MAAMU,eAAeF,cAAc,WAAW;IAE9C,MAAMG,gBAAgBf,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMgB,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEhB,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAIkB;IAKJ,MAAMC,wBAAwBT,kBAC1BP,oBAAoB,cAAc,KAClCA,oBAAoB,oBAAoB;IAC5C,MAAMiB,2BAA2BD,wBAC7B,CAAC,IAAI,EAAEA,uBAAuB,GAC9BhB,oBAAoB,sBAAsB,GACxC,KACA,CAAC,IAAI,EAAE/B,uBAAuB;IAGpC,MAAMiD,eAAelB,oBAAoB,cAAc;IACvD,MAAMmB,kBAAkBD,eAAe,CAAC,IAAI,EAAEA,cAAc,GAAG;IAE/D,IAAIlB,oBAAoB,sBAAsB,EAAE;QAC9Ce,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGf,oBAAoB,sBAAsB,CAAC,qHAAqH,EAAEmB,kBAAkBF,0BAA0B;gBACzN;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAZ,oBAAoB,mCAAmC;IACzD,OACEe,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM,CAAC,+BAA+B,EAAEI,kBAAkBF,0BAA0B;YACtF;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKL;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFZ,oBAAoB,MAAM,CAACe;IAG3Bf,oBAAoB,eAAe,CAAC,IAAI;IAExC,MAAMoB,aAAapB,oBAAoB,QAAQ,CAACF,KAAK,kBAAkB;IAEvE,MAAMuB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASb;QAAa;WACrCM;WACAM;KACJ;IAED,IAAI,EACF,SAASE,WAAW,EACpBC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,OAAOJ,MAAMpB,cAAc;QACnC,aAAaH,KAAK,WAAW;QAG7B,6BAA6BA,KAAK,uBAAuB;IAC3D;IAGA,IAAI4B;IACJ,IAAI;QACF,IAAI;YACFA,aAAaxD,yBAAyBoD,aAAapB,QAAQ,UAAU;QACvE,EAAE,OAAM;YACN,MAAMyB,QAAQ,MAAMF,OAAOJ,MAAMpB,cAAc;gBAC7C,aAAaH,KAAK,WAAW;gBAE7B,6BAA6BA,KAAK,uBAAuB;YAC3D;YACAwB,cAAcK,MAAM,OAAO;YAC3BJ,QAAQI,MAAM,KAAK;YACnBH,oBAAoBG,MAAM,iBAAiB;YAC3CD,aAAaxD,yBAAyBoD,aAAapB,QAAQ,UAAU;QACvE;QAEA,IAAIwB,WAAW,MAAM,IAAIA,AAA+B1C,WAA/B0C,WAAW,eAAe,EAAgB;YACjE1D,QACE;YAEF0D,WAAW,eAAe,GAAG1C;YAC7B0C,WAAW,eAAe,GAAG1C;QAC/B;QAEA,MAAM4C,UAAUF,WAAW,MAAM,GAAG;YAACA,WAAW,MAAM;SAAC,GAAG,EAAE;QAC5D,IAAIG,yBAAyB;QAG7B,IAAIH,AAA+B1C,WAA/B0C,WAAW,eAAe,EAAgB;YAC5C5D,MAAM;YACN+D,yBAAyB;YAEzB,IAAItB,iBACFP,oBAAoB,uBAAuB;QAE/C;QAEA,MAAM8B,cAAkC;YACtC,GAAGJ,UAAU;YACbE;YACAN;YACAC;YACAC;YACA,UAAUO,uBAAuBH,SAAS9B,KAAK,WAAW;YAC1D+B;QACF;QAEAG,OAAON,YAAY;QAEnBE,QAAQ,OAAO,CAAC,CAACrC;YACf,MAAMC,OAAOD,OAAO,IAAI;YACxB,MAAM0C,sBAAsBnC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACP,SAAWA,OAAO,IAAI,KAAKC;YAG9B1B,MAAM,+BAA+BmE;YACrC,MAAMC,eAAeD,sBACjBE,4BAA4BF,oBAAoB,WAAW,IAC3D,EAAE;YAENnE,MAAM,gBAAgBoE;YAEtBA,aAAa,OAAO,CAAC,CAACE;gBACpB,MAAMC,eAAe9C,OAAO,KAAK,CAAC6C,MAAM;gBACxC,IAAIC,cAAc;oBAChB,IAAI,CAACvC,KAAK,uBAAuB,EAAE;wBACjC,IAAI,AAAwB,YAAxB,OAAOuC,cAET9C,OAAO,KAAK,CAAC6C,MAAM,GAAG;4BAAE,QAAQC,aAAa,MAAM;wBAAC;wBAEtD;oBACF;oBAEAL,OACE1B,qBACA;oBAEFf,OAAO,KAAK,CAAC6C,MAAM,GAAG;wBACpB,GAAGC,YAAY;wBACf,kBAAkB/B,oBAAoB,6BAA6B,CACjE+B,cACA;4BACE,cAAc3B,cAAc,YAAY;4BACxC,aAAaA,cAAc,WAAW;wBACxC;oBAEJ;gBACF;YACF;QACF;QAGA,IAAIH,iBAAiB;YACnB,IAAImB,WAAW,cAAc,EAAE,QAC7B1B,oBAAoB,aAAa,CAAC0B,WAAW,cAAc;YAE7D,IAAIA,WAAW,mBAAmB,EAAE,QAClC,KAAK,MAAMY,SAASZ,WAAW,mBAAmB,CAChD1B,oBAAoB,mBAAmB,CAACsC;YAI5C,IAAIZ,WAAW,GAAG,EAChB1B,oBAAoB,gBAAgB,CAAC0B,WAAW,GAAG;QAEvD,OAEE,IAAIA,WAAW,GAAG,EAChB1B,oBAAoB,mBAAmB,CAAC0B,WAAW,GAAG;QAK1D,IAAIA,WAAW,MAAM,EACnB1B,oBAAoB,YAAY,CAAC0B,WAAW,MAAM;QAGpD1B,oBAAoB,MAAM,CAAC;YACzB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAMsB;gBACR;aACD;QACH;QAEA,OAAOQ;IACT,EAAE,OAAOS,YAAY;QAEnB,MAAMC,eACJD,sBAAsB5C,QAAQ4C,WAAW,OAAO,GAAGE,OAAOF;QAC5D,MAAM,IAAIG,qBACR,CAAC,iBAAiB,EAAEF,cAAc,EAClClB,aACAC;IAEJ;AACF"}
1
+ {"version":3,"file":"ai-model/llm-planning.mjs","sources":["../../../src/ai-model/llm-planning.ts"],"sourcesContent":["import { type TUserPrompt, userPromptToString } from '@/common';\nimport type {\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n} from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport { buildYamlFlowFromPlans, findAllMidsceneLocatorField } from '../common';\nimport { planningModelFamilyRequiredForLocateMessage } from './errors';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport {\n extractXMLTag,\n parseMarkFinishedIndexes,\n parseSubGoalsFromXML,\n} from './prompt/util';\nimport { AIResponseParseError, callAI } from './service-caller/index';\nimport type { JsonParser, JsonParserSource } from './service-caller/json';\nimport { prepareModelImage } from './workflows/image-preprocess';\nimport type { PlanOptions } from './workflows/planning/types';\n\nconst debug = getDebug('planning');\nconst warnLog = getDebug('planning', { console: true });\n\nconst noPreviousActionsText =\n 'No previous actions have been executed in this aiAct execution yet. If the instruction asks for actions, choose the first action to execute.';\n\n/**\n * Parse XML response from LLM and convert to RawResponsePlanningAIResponse.\n */\nexport function parseXMLPlanningResponse(\n xmlString: string,\n jsonParser: JsonParser,\n): RawResponsePlanningAIResponse {\n const thought = extractXMLTag(xmlString, 'thought');\n const memory = extractXMLTag(xmlString, 'memory');\n const log = extractXMLTag(xmlString, 'log') || '';\n const error = extractXMLTag(xmlString, 'error');\n const actionType = extractXMLTag(xmlString, 'action-type');\n const actionParamStr = extractXMLTag(xmlString, 'action-param-json');\n\n // Parse <complete> tag with success attribute\n const completeGoalRegex =\n /<complete\\s+success=\"(true|false)\">([\\s\\S]*?)<\\/complete>/i;\n const completeGoalMatch = xmlString.match(completeGoalRegex);\n let finalizeMessage: string | undefined;\n let finalizeSuccess: boolean | undefined;\n\n if (completeGoalMatch) {\n finalizeSuccess = completeGoalMatch[1] === 'true';\n finalizeMessage = completeGoalMatch[2]?.trim() || undefined;\n }\n\n // Parse sub-goal related tags\n const updatePlanContent = extractXMLTag(xmlString, 'update-plan-content');\n const markSubGoalDone = extractXMLTag(xmlString, 'mark-sub-goal-done');\n\n const updateSubGoals = updatePlanContent\n ? parseSubGoalsFromXML(updatePlanContent)\n : undefined;\n const markFinishedIndexes = markSubGoalDone\n ? parseMarkFinishedIndexes(markSubGoalDone)\n : undefined;\n\n // Parse action\n let action: any = null;\n if (actionType && actionType.toLowerCase() !== 'null') {\n // Strip any trailing XML tags that LLM might have leaked into the action type\n // e.g. \"KeyboardPress</action-type>\\n<action-param-json>\" -> \"KeyboardPress\"\n const type = actionType.split('<')[0].trim();\n let param: any = undefined;\n\n if (actionParamStr) {\n try {\n // Parse the JSON string in action-param-json\n param = jsonParser(actionParamStr, {\n source: 'planning-action-param',\n preserveStringValueKeys:\n type.toLowerCase() === 'input' ? ['value'] : undefined,\n });\n } catch (e) {\n throw new Error(`Failed to parse action-param-json: ${e}`);\n }\n }\n\n action = {\n type,\n ...(param !== undefined ? { param } : {}),\n };\n }\n\n return {\n ...(thought ? { thought } : {}),\n ...(memory ? { memory } : {}),\n log,\n ...(error ? { error } : {}),\n action,\n ...(finalizeMessage !== undefined ? { finalizeMessage } : {}),\n ...(finalizeSuccess !== undefined ? { finalizeSuccess } : {}),\n ...(updateSubGoals?.length ? { updateSubGoals } : {}),\n ...(markFinishedIndexes?.length ? { markFinishedIndexes } : {}),\n };\n}\n\nexport async function plan(\n userInstruction: TUserPrompt,\n opts: PlanOptions,\n): Promise<PlanningAIResponse> {\n const { context, conversationHistory } = opts;\n const modelRuntime = opts.modelRuntime;\n const { adapter } = modelRuntime;\n const { shotSize } = context;\n const screenshotBase64 = context.screenshot.base64;\n\n if (opts.includeLocateInPlanning && !modelRuntime.config.modelFamily) {\n throw new Error(\n planningModelFamilyRequiredForLocateMessage(modelRuntime.config.slot),\n );\n }\n\n const locateResultAdapter =\n modelRuntime.config.modelFamily && adapter.locate.kind === 'standard'\n ? adapter.locate.resultAdapter\n : undefined;\n\n // Only enable sub-goals when aiAct is in deep-thinking planning mode.\n const includeSubGoals = opts.deepThink === true;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n locatePromptSpec: locateResultAdapter?.promptSpec,\n includeLocateInPlanning: opts.includeLocateInPlanning,\n includeThought: true, // always include thought\n includeSubGoals,\n });\n\n const preparedImage = await prepareModelImage({\n imageBase64: screenshotBase64,\n width: shotSize.width,\n height: shotSize.height,\n policy: adapter.imagePreprocess,\n });\n const imagePayload = preparedImage.imageBase64;\n\n const userInstructionText = userPromptToString(userInstruction);\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const referenceImageMessages = opts.referenceImageMessages ?? [];\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstructionText}</user_instruction>`,\n },\n ],\n },\n ...referenceImageMessages,\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n // Build sub-goal status text to include in the message\n // In planning deep-think mode: show full sub-goals with logs\n // Otherwise: show historical execution logs\n const executionProgressText = includeSubGoals\n ? conversationHistory.subGoalsToText()\n : conversationHistory.historicalLogsToText();\n const executionProgressSection = executionProgressText\n ? `\\n\\n${executionProgressText}`\n : conversationHistory.pendingFeedbackMessage\n ? ''\n : `\\n\\n${noPreviousActionsText}`;\n\n // Build memories text to include in the message\n const memoriesText = conversationHistory.memoriesToText();\n const memoriesSection = memoriesText ? `\\n\\n${memoriesText}` : '';\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The previous action has been executed, here is the latest screenshot. Please continue according to the instruction.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `This is the current screenshot.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n\n // Compress history if it exceeds the threshold to avoid context overflow\n conversationHistory.compressHistory(50, 20);\n\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n let {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelRuntime, {\n abortSignal: opts.abortSignal,\n // Planning with locate results is localization-sensitive. Adapters decide\n // whether this should request original image detail.\n requiresOriginalImageDetail: opts.includeLocateInPlanning,\n });\n\n // Parse XML response to JSON object, retry once on parse failure\n let planFromAI: RawResponsePlanningAIResponse;\n try {\n try {\n planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);\n } catch {\n const retry = await callAI(msgs, modelRuntime, {\n abortSignal: opts.abortSignal,\n // Keep retry requests consistent with the initial planning call.\n requiresOriginalImageDetail: opts.includeLocateInPlanning,\n });\n rawResponse = retry.content;\n usage = retry.usage;\n reasoning_content = retry.reasoning_content;\n planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);\n }\n\n if (planFromAI.action && planFromAI.finalizeSuccess !== undefined) {\n warnLog(\n 'Planning response included both an action and <complete>; ignoring <complete> output.',\n );\n planFromAI.finalizeMessage = undefined;\n planFromAI.finalizeSuccess = undefined;\n }\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n\n // Check if task is completed via <complete> tag\n if (planFromAI.finalizeSuccess !== undefined) {\n debug('task completed via <complete> tag, stop planning');\n shouldContinuePlanning = false;\n // Mark all sub-goals as finished when goal is completed in planning deep-think mode.\n if (includeSubGoals) {\n conversationHistory.markAllSubGoalsFinished();\n }\n }\n\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult) {\n if (!opts.includeLocateInPlanning) {\n if (typeof locateResult === 'object') {\n // In prompt-only planning mode, ignore any accidental coordinates from the model.\n action.param[field] = { prompt: locateResult.prompt };\n }\n return;\n }\n\n assert(\n locateResultAdapter,\n 'generic planning locate normalization requires a standard locate adapter',\n );\n action.param[field] = {\n ...locateResult,\n locatedPixelBbox: locateResultAdapter.adaptPlanningParamToPixelBbox(\n locateResult,\n {\n preparedSize: preparedImage.preparedSize,\n contentSize: preparedImage.contentSize,\n },\n ),\n };\n }\n });\n });\n\n // Update sub-goals in conversation history only in planning deep-think mode.\n if (includeSubGoals) {\n if (planFromAI.updateSubGoals?.length) {\n conversationHistory.mergeSubGoals(planFromAI.updateSubGoals);\n }\n if (planFromAI.markFinishedIndexes?.length) {\n for (const index of planFromAI.markFinishedIndexes) {\n conversationHistory.markSubGoalFinished(index);\n }\n }\n // Append the planning log to the currently running sub-goal\n if (planFromAI.log) {\n conversationHistory.appendSubGoalLog(planFromAI.log);\n }\n } else {\n // Without planning deep-think mode, accumulate logs as historical execution steps.\n if (planFromAI.log) {\n conversationHistory.appendHistoricalLog(planFromAI.log);\n }\n }\n\n // Append memory to conversation history if present\n if (planFromAI.memory) {\n conversationHistory.appendMemory(planFromAI.memory);\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n}\n"],"names":["debug","getDebug","warnLog","noPreviousActionsText","parseXMLPlanningResponse","xmlString","jsonParser","thought","extractXMLTag","memory","log","error","actionType","actionParamStr","completeGoalRegex","completeGoalMatch","finalizeMessage","finalizeSuccess","undefined","updatePlanContent","markSubGoalDone","updateSubGoals","parseSubGoalsFromXML","markFinishedIndexes","parseMarkFinishedIndexes","action","type","param","e","Error","plan","userInstruction","opts","context","conversationHistory","modelRuntime","adapter","shotSize","screenshotBase64","planningModelFamilyRequiredForLocateMessage","locateResultAdapter","includeSubGoals","systemPrompt","systemPromptToTaskPlanning","preparedImage","prepareModelImage","imagePayload","userInstructionText","userPromptToString","actionContext","referenceImageMessages","instruction","latestFeedbackMessage","executionProgressText","executionProgressSection","memoriesText","memoriesSection","historyLog","msgs","rawResponse","usage","reasoning_content","callAI","planFromAI","retry","actions","shouldContinuePlanning","returnValue","buildYamlFlowFromPlans","assert","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","index","parseError","errorMessage","String","AIResponseParseError"],"mappings":";;;;;;;;AAqBA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,UAAUD,SAAS,YAAY;IAAE,SAAS;AAAK;AAErD,MAAME,wBACJ;AAKK,SAASC,yBACdC,SAAiB,EACjBC,UAAsB;IAEtB,MAAMC,UAAUC,cAAcH,WAAW;IACzC,MAAMI,SAASD,cAAcH,WAAW;IACxC,MAAMK,MAAMF,cAAcH,WAAW,UAAU;IAC/C,MAAMM,QAAQH,cAAcH,WAAW;IACvC,MAAMO,aAAaJ,cAAcH,WAAW;IAC5C,MAAMQ,iBAAiBL,cAAcH,WAAW;IAGhD,MAAMS,oBACJ;IACF,MAAMC,oBAAoBV,UAAU,KAAK,CAACS;IAC1C,IAAIE;IACJ,IAAIC;IAEJ,IAAIF,mBAAmB;QACrBE,kBAAkBF,AAAyB,WAAzBA,iBAAiB,CAAC,EAAE;QACtCC,kBAAkBD,iBAAiB,CAAC,EAAE,EAAE,UAAUG;IACpD;IAGA,MAAMC,oBAAoBX,cAAcH,WAAW;IACnD,MAAMe,kBAAkBZ,cAAcH,WAAW;IAEjD,MAAMgB,iBAAiBF,oBACnBG,qBAAqBH,qBACrBD;IACJ,MAAMK,sBAAsBH,kBACxBI,yBAAyBJ,mBACzBF;IAGJ,IAAIO,SAAc;IAClB,IAAIb,cAAcA,AAA6B,WAA7BA,WAAW,WAAW,IAAe;QAGrD,MAAMc,OAAOd,WAAW,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI;QAC1C,IAAIe;QAEJ,IAAId,gBACF,IAAI;YAEFc,QAAQrB,WAAWO,gBAAgB;gBACjC,QAAQ;gBACR,yBACEa,AAAuB,YAAvBA,KAAK,WAAW,KAAiB;oBAAC;iBAAQ,GAAGR;YACjD;QACF,EAAE,OAAOU,GAAG;YACV,MAAM,IAAIC,MAAM,CAAC,mCAAmC,EAAED,GAAG;QAC3D;QAGFH,SAAS;YACPC;YACA,GAAIC,AAAUT,WAAVS,QAAsB;gBAAEA;YAAM,IAAI,CAAC,CAAC;QAC1C;IACF;IAEA,OAAO;QACL,GAAIpB,UAAU;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAC9B,GAAIE,SAAS;YAAEA;QAAO,IAAI,CAAC,CAAC;QAC5BC;QACA,GAAIC,QAAQ;YAAEA;QAAM,IAAI,CAAC,CAAC;QAC1Bc;QACA,GAAIT,AAAoBE,WAApBF,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAIC,AAAoBC,WAApBD,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAII,gBAAgB,SAAS;YAAEA;QAAe,IAAI,CAAC,CAAC;QACpD,GAAIE,qBAAqB,SAAS;YAAEA;QAAoB,IAAI,CAAC,CAAC;IAChE;AACF;AAEO,eAAeO,KACpBC,eAA4B,EAC5BC,IAAiB;IAEjB,MAAM,EAAEC,OAAO,EAAEC,mBAAmB,EAAE,GAAGF;IACzC,MAAMG,eAAeH,KAAK,YAAY;IACtC,MAAM,EAAEI,OAAO,EAAE,GAAGD;IACpB,MAAM,EAAEE,QAAQ,EAAE,GAAGJ;IACrB,MAAMK,mBAAmBL,QAAQ,UAAU,CAAC,MAAM;IAElD,IAAID,KAAK,uBAAuB,IAAI,CAACG,aAAa,MAAM,CAAC,WAAW,EAClE,MAAM,IAAIN,MACRU,4CAA4CJ,aAAa,MAAM,CAAC,IAAI;IAIxE,MAAMK,sBACJL,aAAa,MAAM,CAAC,WAAW,IAAIC,AAAwB,eAAxBA,QAAQ,MAAM,CAAC,IAAI,GAClDA,QAAQ,MAAM,CAAC,aAAa,GAC5BlB;IAGN,MAAMuB,kBAAkBT,AAAmB,SAAnBA,KAAK,SAAS;IAEtC,MAAMU,eAAe,MAAMC,2BAA2B;QACpD,aAAaX,KAAK,WAAW;QAC7B,kBAAkBQ,qBAAqB;QACvC,yBAAyBR,KAAK,uBAAuB;QACrD,gBAAgB;QAChBS;IACF;IAEA,MAAMG,gBAAgB,MAAMC,kBAAkB;QAC5C,aAAaP;QACb,OAAOD,SAAS,KAAK;QACrB,QAAQA,SAAS,MAAM;QACvB,QAAQD,QAAQ,eAAe;IACjC;IACA,MAAMU,eAAeF,cAAc,WAAW;IAE9C,MAAMG,sBAAsBC,mBAAmBjB;IAC/C,MAAMkB,gBAAgBjB,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMkB,yBAAyBlB,KAAK,sBAAsB,IAAI,EAAE;IAChE,MAAMmB,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGF,cAAc,kBAAkB,EAAEF,oBAAoB,mBAAmB,CAAC;gBACrF;aACD;QACH;WACGG;KACJ;IAED,IAAIE;IAKJ,MAAMC,wBAAwBZ,kBAC1BP,oBAAoB,cAAc,KAClCA,oBAAoB,oBAAoB;IAC5C,MAAMoB,2BAA2BD,wBAC7B,CAAC,IAAI,EAAEA,uBAAuB,GAC9BnB,oBAAoB,sBAAsB,GACxC,KACA,CAAC,IAAI,EAAE/B,uBAAuB;IAGpC,MAAMoD,eAAerB,oBAAoB,cAAc;IACvD,MAAMsB,kBAAkBD,eAAe,CAAC,IAAI,EAAEA,cAAc,GAAG;IAE/D,IAAIrB,oBAAoB,sBAAsB,EAAE;QAC9CkB,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGlB,oBAAoB,sBAAsB,CAAC,qHAAqH,EAAEsB,kBAAkBF,0BAA0B;gBACzN;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKR;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAZ,oBAAoB,mCAAmC;IACzD,OACEkB,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM,CAAC,+BAA+B,EAAEI,kBAAkBF,0BAA0B;YACtF;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKR;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFZ,oBAAoB,MAAM,CAACkB;IAG3BlB,oBAAoB,eAAe,CAAC,IAAI;IAExC,MAAMuB,aAAavB,oBAAoB,QAAQ,CAACF,KAAK,kBAAkB;IAEvE,MAAM0B,OAAqC;QACzC;YAAE,MAAM;YAAU,SAAShB;QAAa;WACrCS;WACAM;KACJ;IAED,IAAI,EACF,SAASE,WAAW,EACpBC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,OAAOJ,MAAMvB,cAAc;QACnC,aAAaH,KAAK,WAAW;QAG7B,6BAA6BA,KAAK,uBAAuB;IAC3D;IAGA,IAAI+B;IACJ,IAAI;QACF,IAAI;YACFA,aAAa3D,yBAAyBuD,aAAavB,QAAQ,UAAU;QACvE,EAAE,OAAM;YACN,MAAM4B,QAAQ,MAAMF,OAAOJ,MAAMvB,cAAc;gBAC7C,aAAaH,KAAK,WAAW;gBAE7B,6BAA6BA,KAAK,uBAAuB;YAC3D;YACA2B,cAAcK,MAAM,OAAO;YAC3BJ,QAAQI,MAAM,KAAK;YACnBH,oBAAoBG,MAAM,iBAAiB;YAC3CD,aAAa3D,yBAAyBuD,aAAavB,QAAQ,UAAU;QACvE;QAEA,IAAI2B,WAAW,MAAM,IAAIA,AAA+B7C,WAA/B6C,WAAW,eAAe,EAAgB;YACjE7D,QACE;YAEF6D,WAAW,eAAe,GAAG7C;YAC7B6C,WAAW,eAAe,GAAG7C;QAC/B;QAEA,MAAM+C,UAAUF,WAAW,MAAM,GAAG;YAACA,WAAW,MAAM;SAAC,GAAG,EAAE;QAC5D,IAAIG,yBAAyB;QAG7B,IAAIH,AAA+B7C,WAA/B6C,WAAW,eAAe,EAAgB;YAC5C/D,MAAM;YACNkE,yBAAyB;YAEzB,IAAIzB,iBACFP,oBAAoB,uBAAuB;QAE/C;QAEA,MAAMiC,cAAkC;YACtC,GAAGJ,UAAU;YACbE;YACAN;YACAC;YACAC;YACA,UAAUO,uBAAuBH,SAASjC,KAAK,WAAW;YAC1DkC;QACF;QAEAG,OAAON,YAAY;QAEnBE,QAAQ,OAAO,CAAC,CAACxC;YACf,MAAMC,OAAOD,OAAO,IAAI;YACxB,MAAM6C,sBAAsBtC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACP,SAAWA,OAAO,IAAI,KAAKC;YAG9B1B,MAAM,+BAA+BsE;YACrC,MAAMC,eAAeD,sBACjBE,4BAA4BF,oBAAoB,WAAW,IAC3D,EAAE;YAENtE,MAAM,gBAAgBuE;YAEtBA,aAAa,OAAO,CAAC,CAACE;gBACpB,MAAMC,eAAejD,OAAO,KAAK,CAACgD,MAAM;gBACxC,IAAIC,cAAc;oBAChB,IAAI,CAAC1C,KAAK,uBAAuB,EAAE;wBACjC,IAAI,AAAwB,YAAxB,OAAO0C,cAETjD,OAAO,KAAK,CAACgD,MAAM,GAAG;4BAAE,QAAQC,aAAa,MAAM;wBAAC;wBAEtD;oBACF;oBAEAL,OACE7B,qBACA;oBAEFf,OAAO,KAAK,CAACgD,MAAM,GAAG;wBACpB,GAAGC,YAAY;wBACf,kBAAkBlC,oBAAoB,6BAA6B,CACjEkC,cACA;4BACE,cAAc9B,cAAc,YAAY;4BACxC,aAAaA,cAAc,WAAW;wBACxC;oBAEJ;gBACF;YACF;QACF;QAGA,IAAIH,iBAAiB;YACnB,IAAIsB,WAAW,cAAc,EAAE,QAC7B7B,oBAAoB,aAAa,CAAC6B,WAAW,cAAc;YAE7D,IAAIA,WAAW,mBAAmB,EAAE,QAClC,KAAK,MAAMY,SAASZ,WAAW,mBAAmB,CAChD7B,oBAAoB,mBAAmB,CAACyC;YAI5C,IAAIZ,WAAW,GAAG,EAChB7B,oBAAoB,gBAAgB,CAAC6B,WAAW,GAAG;QAEvD,OAEE,IAAIA,WAAW,GAAG,EAChB7B,oBAAoB,mBAAmB,CAAC6B,WAAW,GAAG;QAK1D,IAAIA,WAAW,MAAM,EACnB7B,oBAAoB,YAAY,CAAC6B,WAAW,MAAM;QAGpD7B,oBAAoB,MAAM,CAAC;YACzB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAMyB;gBACR;aACD;QACH;QAEA,OAAOQ;IACT,EAAE,OAAOS,YAAY;QAEnB,MAAMC,eACJD,sBAAsB/C,QAAQ+C,WAAW,OAAO,GAAGE,OAAOF;QAC5D,MAAM,IAAIG,qBACR,CAAC,iBAAiB,EAAEF,cAAc,EAClClB,aACAC;IAEJ;AACF"}
@@ -1,3 +1,4 @@
1
+ import { userPromptToString } from "../../../common.mjs";
1
2
  import { getDebug } from "@midscene/shared/logger";
2
3
  import { AIResponseParseError, callAIWithStringResponse } from "../../service-caller/index.mjs";
3
4
  import { transformAutoGLMAction } from "./actions.mjs";
@@ -7,15 +8,17 @@ async function autoGlmPlanning(userInstruction, options, getSystemPrompt) {
7
8
  const { conversationHistory, context, actionContext } = options;
8
9
  const systemPrompt = getSystemPrompt() + (actionContext ? `<high_priority_knowledge>${actionContext}</high_priority_knowledge>` : '');
9
10
  const imagePayloadBase64 = context.screenshot.base64;
10
- conversationHistory.append({
11
+ const userInstructionText = userPromptToString(userInstruction);
12
+ const referenceImageMessages = options.referenceImageMessages ?? [];
13
+ const userInstructionMessage = {
11
14
  role: 'user',
12
15
  content: [
13
16
  {
14
17
  type: 'text',
15
- text: userInstruction
18
+ text: userInstructionText
16
19
  }
17
20
  ]
18
- });
21
+ };
19
22
  conversationHistory.append({
20
23
  role: 'user',
21
24
  content: [
@@ -32,6 +35,8 @@ async function autoGlmPlanning(userInstruction, options, getSystemPrompt) {
32
35
  role: 'system',
33
36
  content: systemPrompt
34
37
  },
38
+ userInstructionMessage,
39
+ ...referenceImageMessages,
35
40
  ...conversationHistory.snapshot(1)
36
41
  ];
37
42
  const { content: rawResponse, usage } = await callAIWithStringResponse(msgs, options.modelRuntime, {
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/models/auto-glm/planning.mjs","sources":["../../../../../src/ai-model/models/auto-glm/planning.ts"],"sourcesContent":["import type { PlanningAIResponse } from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n AIResponseParseError,\n callAIWithStringResponse,\n} from '../../service-caller/index';\nimport type { PlanOptions } from '../../workflows/planning/types';\nimport { transformAutoGLMAction } from './actions';\nimport { parseAction, parseAutoGLMResponse } from './parser';\n\nconst debug = getDebug('auto-glm-planning');\n\nexport async function autoGlmPlanning(\n userInstruction: string,\n options: PlanOptions,\n getSystemPrompt: () => string,\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, actionContext } = options;\n\n const systemPrompt =\n getSystemPrompt() +\n (actionContext\n ? `<high_priority_knowledge>${actionContext}</high_priority_knowledge>`\n : '');\n\n const imagePayloadBase64 = context.screenshot.base64;\n\n conversationHistory.append({\n role: 'user',\n content: [{ type: 'text', text: userInstruction }],\n });\n conversationHistory.append({\n role: 'user',\n content: [{ type: 'image_url', image_url: { url: imagePayloadBase64 } }],\n });\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...conversationHistory.snapshot(1),\n ];\n\n const { content: rawResponse, usage } = await callAIWithStringResponse(\n msgs,\n options.modelRuntime,\n {\n abortSignal: options.abortSignal,\n },\n );\n\n debug('autoGLMPlanning rawResponse:', rawResponse);\n\n let parsedResponse: ReturnType<typeof parseAutoGLMResponse>;\n let transformedActions: ReturnType<typeof transformAutoGLMAction>;\n\n try {\n parsedResponse = parseAutoGLMResponse(rawResponse);\n debug('thinking in response:', parsedResponse.think);\n debug('action in response:', parsedResponse.content);\n\n const parsedAction = parseAction(parsedResponse);\n debug('Parsed action object:', parsedAction);\n transformedActions = transformAutoGLMAction(\n parsedAction,\n context.shotSize,\n options.actionSpace,\n );\n debug('Transformed actions:', transformedActions);\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `Parse error: ${errorMessage}`,\n JSON.stringify(rawResponse, undefined, 2),\n usage,\n );\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: `<think>${parsedResponse.think}</think><answer>${parsedResponse.content}</answer>`,\n });\n\n const shouldContinuePlanning = !parsedResponse.content.startsWith('finish(');\n\n return {\n actions: transformedActions,\n log: rawResponse,\n usage,\n shouldContinuePlanning,\n rawResponse: JSON.stringify(rawResponse, undefined, 2),\n };\n}\n"],"names":["debug","getDebug","autoGlmPlanning","userInstruction","options","getSystemPrompt","conversationHistory","context","actionContext","systemPrompt","imagePayloadBase64","msgs","rawResponse","usage","callAIWithStringResponse","parsedResponse","transformedActions","parseAutoGLMResponse","parsedAction","parseAction","transformAutoGLMAction","parseError","errorMessage","Error","String","AIResponseParseError","JSON","undefined","shouldContinuePlanning"],"mappings":";;;;AAWA,MAAMA,QAAQC,SAAS;AAEhB,eAAeC,gBACpBC,eAAuB,EACvBC,OAAoB,EACpBC,eAA6B;IAE7B,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,aAAa,EAAE,GAAGJ;IAExD,MAAMK,eACJJ,oBACCG,CAAAA,gBACG,CAAC,yBAAyB,EAAEA,cAAc,0BAA0B,CAAC,GACrE,EAAC;IAEP,MAAME,qBAAqBH,QAAQ,UAAU,CAAC,MAAM;IAEpDD,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YAAC;gBAAE,MAAM;gBAAQ,MAAMH;YAAgB;SAAE;IACpD;IACAG,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YAAC;gBAAE,MAAM;gBAAa,WAAW;oBAAE,KAAKI;gBAAmB;YAAE;SAAE;IAC1E;IAEA,MAAMC,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASF;QAAa;WACrCH,oBAAoB,QAAQ,CAAC;KACjC;IAED,MAAM,EAAE,SAASM,WAAW,EAAEC,KAAK,EAAE,GAAG,MAAMC,yBAC5CH,MACAP,QAAQ,YAAY,EACpB;QACE,aAAaA,QAAQ,WAAW;IAClC;IAGFJ,MAAM,gCAAgCY;IAEtC,IAAIG;IACJ,IAAIC;IAEJ,IAAI;QACFD,iBAAiBE,qBAAqBL;QACtCZ,MAAM,yBAAyBe,eAAe,KAAK;QACnDf,MAAM,uBAAuBe,eAAe,OAAO;QAEnD,MAAMG,eAAeC,YAAYJ;QACjCf,MAAM,yBAAyBkB;QAC/BF,qBAAqBI,uBACnBF,cACAX,QAAQ,QAAQ,EAChBH,QAAQ,WAAW;QAErBJ,MAAM,wBAAwBgB;IAChC,EAAE,OAAOK,YAAY;QAEnB,MAAMC,eACJD,sBAAsBE,QAAQF,WAAW,OAAO,GAAGG,OAAOH;QAC5D,MAAM,IAAII,qBACR,CAAC,aAAa,EAAEH,cAAc,EAC9BI,KAAK,SAAS,CAACd,aAAae,QAAW,IACvCd;IAEJ;IAEAP,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS,CAAC,OAAO,EAAES,eAAe,KAAK,CAAC,gBAAgB,EAAEA,eAAe,OAAO,CAAC,SAAS,CAAC;IAC7F;IAEA,MAAMa,yBAAyB,CAACb,eAAe,OAAO,CAAC,UAAU,CAAC;IAElE,OAAO;QACL,SAASC;QACT,KAAKJ;QACLC;QACAe;QACA,aAAaF,KAAK,SAAS,CAACd,aAAae,QAAW;IACtD;AACF"}
1
+ {"version":3,"file":"ai-model/models/auto-glm/planning.mjs","sources":["../../../../../src/ai-model/models/auto-glm/planning.ts"],"sourcesContent":["import { type TUserPrompt, userPromptToString } from '@/common';\nimport type { PlanningAIResponse } from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n AIResponseParseError,\n callAIWithStringResponse,\n} from '../../service-caller/index';\nimport type { PlanOptions } from '../../workflows/planning/types';\nimport { transformAutoGLMAction } from './actions';\nimport { parseAction, parseAutoGLMResponse } from './parser';\n\nconst debug = getDebug('auto-glm-planning');\n\nexport async function autoGlmPlanning(\n userInstruction: TUserPrompt,\n options: PlanOptions,\n getSystemPrompt: () => string,\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, actionContext } = options;\n\n const systemPrompt =\n getSystemPrompt() +\n (actionContext\n ? `<high_priority_knowledge>${actionContext}</high_priority_knowledge>`\n : '');\n\n const imagePayloadBase64 = context.screenshot.base64;\n const userInstructionText = userPromptToString(userInstruction);\n const referenceImageMessages = options.referenceImageMessages ?? [];\n\n const userInstructionMessage: ChatCompletionMessageParam = {\n role: 'user',\n content: [{ type: 'text', text: userInstructionText }],\n };\n conversationHistory.append({\n role: 'user',\n content: [{ type: 'image_url', image_url: { url: imagePayloadBase64 } }],\n });\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n userInstructionMessage,\n ...referenceImageMessages,\n ...conversationHistory.snapshot(1),\n ];\n\n const { content: rawResponse, usage } = await callAIWithStringResponse(\n msgs,\n options.modelRuntime,\n {\n abortSignal: options.abortSignal,\n },\n );\n\n debug('autoGLMPlanning rawResponse:', rawResponse);\n\n let parsedResponse: ReturnType<typeof parseAutoGLMResponse>;\n let transformedActions: ReturnType<typeof transformAutoGLMAction>;\n\n try {\n parsedResponse = parseAutoGLMResponse(rawResponse);\n debug('thinking in response:', parsedResponse.think);\n debug('action in response:', parsedResponse.content);\n\n const parsedAction = parseAction(parsedResponse);\n debug('Parsed action object:', parsedAction);\n transformedActions = transformAutoGLMAction(\n parsedAction,\n context.shotSize,\n options.actionSpace,\n );\n debug('Transformed actions:', transformedActions);\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `Parse error: ${errorMessage}`,\n JSON.stringify(rawResponse, undefined, 2),\n usage,\n );\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: `<think>${parsedResponse.think}</think><answer>${parsedResponse.content}</answer>`,\n });\n\n const shouldContinuePlanning = !parsedResponse.content.startsWith('finish(');\n\n return {\n actions: transformedActions,\n log: rawResponse,\n usage,\n shouldContinuePlanning,\n rawResponse: JSON.stringify(rawResponse, undefined, 2),\n };\n}\n"],"names":["debug","getDebug","autoGlmPlanning","userInstruction","options","getSystemPrompt","conversationHistory","context","actionContext","systemPrompt","imagePayloadBase64","userInstructionText","userPromptToString","referenceImageMessages","userInstructionMessage","msgs","rawResponse","usage","callAIWithStringResponse","parsedResponse","transformedActions","parseAutoGLMResponse","parsedAction","parseAction","transformAutoGLMAction","parseError","errorMessage","Error","String","AIResponseParseError","JSON","undefined","shouldContinuePlanning"],"mappings":";;;;;AAYA,MAAMA,QAAQC,SAAS;AAEhB,eAAeC,gBACpBC,eAA4B,EAC5BC,OAAoB,EACpBC,eAA6B;IAE7B,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,aAAa,EAAE,GAAGJ;IAExD,MAAMK,eACJJ,oBACCG,CAAAA,gBACG,CAAC,yBAAyB,EAAEA,cAAc,0BAA0B,CAAC,GACrE,EAAC;IAEP,MAAME,qBAAqBH,QAAQ,UAAU,CAAC,MAAM;IACpD,MAAMI,sBAAsBC,mBAAmBT;IAC/C,MAAMU,yBAAyBT,QAAQ,sBAAsB,IAAI,EAAE;IAEnE,MAAMU,yBAAqD;QACzD,MAAM;QACN,SAAS;YAAC;gBAAE,MAAM;gBAAQ,MAAMH;YAAoB;SAAE;IACxD;IACAL,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YAAC;gBAAE,MAAM;gBAAa,WAAW;oBAAE,KAAKI;gBAAmB;YAAE;SAAE;IAC1E;IAEA,MAAMK,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASN;QAAa;QACxCK;WACGD;WACAP,oBAAoB,QAAQ,CAAC;KACjC;IAED,MAAM,EAAE,SAASU,WAAW,EAAEC,KAAK,EAAE,GAAG,MAAMC,yBAC5CH,MACAX,QAAQ,YAAY,EACpB;QACE,aAAaA,QAAQ,WAAW;IAClC;IAGFJ,MAAM,gCAAgCgB;IAEtC,IAAIG;IACJ,IAAIC;IAEJ,IAAI;QACFD,iBAAiBE,qBAAqBL;QACtChB,MAAM,yBAAyBmB,eAAe,KAAK;QACnDnB,MAAM,uBAAuBmB,eAAe,OAAO;QAEnD,MAAMG,eAAeC,YAAYJ;QACjCnB,MAAM,yBAAyBsB;QAC/BF,qBAAqBI,uBACnBF,cACAf,QAAQ,QAAQ,EAChBH,QAAQ,WAAW;QAErBJ,MAAM,wBAAwBoB;IAChC,EAAE,OAAOK,YAAY;QAEnB,MAAMC,eACJD,sBAAsBE,QAAQF,WAAW,OAAO,GAAGG,OAAOH;QAC5D,MAAM,IAAII,qBACR,CAAC,aAAa,EAAEH,cAAc,EAC9BI,KAAK,SAAS,CAACd,aAAae,QAAW,IACvCd;IAEJ;IAEAX,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS,CAAC,OAAO,EAAEa,eAAe,KAAK,CAAC,gBAAgB,EAAEA,eAAe,OAAO,CAAC,SAAS,CAAC;IAC7F;IAEA,MAAMa,yBAAyB,CAACb,eAAe,OAAO,CAAC,UAAU,CAAC;IAElE,OAAO;QACL,SAASC;QACT,KAAKJ;QACLC;QACAe;QACA,aAAaF,KAAK,SAAS,CAACd,aAAae,QAAW;IACtD;AACF"}
@@ -1,3 +1,4 @@
1
+ import { userPromptToString } from "../../../common.mjs";
1
2
  import { getDebug } from "@midscene/shared/logger";
2
3
  import { transformHotkeyInput } from "@midscene/shared/us-keyboard-layout";
3
4
  import { assert } from "@midscene/shared/utils";
@@ -29,10 +30,12 @@ function pointToLocateParam(point, thought, size) {
29
30
  }
30
31
  async function uiTarsPlanning(userInstruction, options, uiTarsModelVersion) {
31
32
  const { conversationHistory, context, modelRuntime, actionContext } = options;
32
- let instruction = userInstruction;
33
- if (actionContext) instruction = `<high_priority_knowledge>${actionContext}</high_priority_knowledge>\n<user_instruction>${userInstruction}</user_instruction>`;
33
+ const userInstructionText = userPromptToString(userInstruction);
34
+ let instruction = userInstructionText;
35
+ if (actionContext) instruction = `<high_priority_knowledge>${actionContext}</high_priority_knowledge>\n<user_instruction>${userInstructionText}</user_instruction>`;
34
36
  const systemPrompt = getUiTarsPlanningPrompt() + instruction;
35
37
  const screenshotBase64 = context.screenshot.base64;
38
+ const referenceImageMessages = options.referenceImageMessages ?? [];
36
39
  conversationHistory.append({
37
40
  role: 'user',
38
41
  content: [
@@ -49,6 +52,7 @@ async function uiTarsPlanning(userInstruction, options, uiTarsModelVersion) {
49
52
  role: 'user',
50
53
  content: systemPrompt
51
54
  },
55
+ ...referenceImageMessages,
52
56
  ...conversationHistory.snapshot()
53
57
  ], modelRuntime, {
54
58
  abortSignal: options.abortSignal
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/models/ui-tars/planning.mjs","sources":["../../../../../src/ai-model/models/ui-tars/planning.ts"],"sourcesContent":["import type {\n PlanningAIResponse,\n PlanningAction,\n PlanningLocateParamWithLocatedPixelBbox,\n Size,\n} from '@/types';\nimport type { UITarsModelVersion } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { transformHotkeyInput } from '@midscene/shared/us-keyboard-layout';\nimport { assert } from '@midscene/shared/utils';\nimport { actionParser } from '@ui-tars/action-parser';\nimport {\n getSummary,\n getUiTarsPlanningPrompt,\n} from '../../prompt/ui-tars-planning';\nimport {\n AIResponseParseError,\n callAIWithStringResponse,\n} from '../../service-caller/index';\nimport { finalizePixelBbox } from '../../shared/model-locate-result/bbox';\nimport { mapLocateResultToPixelBboxByCoordinates } from '../../shared/model-locate-result/pixel-bbox-mapper';\nimport type { PlanOptions } from '../../workflows/planning/types';\n\ntype ActionType =\n | 'click'\n | 'left_double'\n | 'right_single'\n | 'drag'\n | 'type'\n | 'hotkey'\n | 'finished'\n | 'scroll'\n | 'wait';\n\nconst debug = getDebug('ui-tars-planning');\nconst warnLog = getDebug('ui-tars-planning', { console: true });\n\nfunction pointToLocateParam(\n point: [number, number],\n thought: string | null,\n size: Size,\n): PlanningLocateParamWithLocatedPixelBbox {\n const ctx = { preparedSize: size };\n const pixelBbox = mapLocateResultToPixelBboxByCoordinates(\n { type: 'point', coordinates: point },\n ctx,\n { shape: 'point', order: 'xy', normalizedBy: 1 },\n );\n\n return {\n prompt: thought || '',\n locatedPixelBbox: finalizePixelBbox(pixelBbox, point, ctx),\n };\n}\n\nexport async function uiTarsPlanning(\n userInstruction: string,\n options: PlanOptions,\n uiTarsModelVersion: UITarsModelVersion,\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, modelRuntime, actionContext } = options;\n\n let instruction = userInstruction;\n if (actionContext) {\n instruction = `<high_priority_knowledge>${actionContext}</high_priority_knowledge>\\n<user_instruction>${userInstruction}</user_instruction>`;\n }\n\n const systemPrompt = getUiTarsPlanningPrompt() + instruction;\n\n const screenshotBase64 = context.screenshot.base64;\n\n conversationHistory.append({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n },\n },\n ],\n });\n\n const res = await callAIWithStringResponse(\n [\n {\n role: 'user',\n content: systemPrompt,\n },\n ...conversationHistory.snapshot(),\n ],\n modelRuntime,\n {\n abortSignal: options.abortSignal,\n },\n );\n\n let convertedText: string;\n let parsed: ReturnType<typeof actionParser>['parsed'];\n\n try {\n convertedText = convertBboxToCoordinates(res.content);\n\n const { shotSize } = context;\n const parseResult = actionParser({\n prediction: convertedText,\n factor: [1000, 1000],\n screenContext: {\n width: shotSize.width,\n height: shotSize.height,\n },\n modelVer: uiTarsModelVersion,\n });\n parsed = parseResult.parsed;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `Parse error: ${errorMessage}`,\n JSON.stringify(res.content, undefined, 2),\n res.usage,\n );\n }\n\n const { shotSize } = context;\n\n debug(\n 'ui-tars modelVer',\n uiTarsModelVersion,\n ', parsed',\n JSON.stringify(parsed),\n );\n\n const transformActions: PlanningAction[] = [];\n const unhandledActions: Array<{ type: string; thought: string }> = [];\n let shouldContinue = true;\n parsed.forEach((action) => {\n const actionType = (action.action_type || '').toLowerCase();\n if (actionType === 'click') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box);\n\n const locate = pointToLocateParam(point, action.thought, shotSize);\n\n transformActions.push({\n type: 'Tap',\n param: {\n locate,\n },\n });\n } else if (actionType === 'left_double') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box);\n\n const locate = pointToLocateParam(point, action.thought, shotSize);\n\n transformActions.push({\n type: 'DoubleClick',\n param: {\n locate,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'right_single') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box);\n\n const locate = pointToLocateParam(point, action.thought, shotSize);\n\n transformActions.push({\n type: 'RightClick',\n param: {\n locate,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'drag') {\n assert(action.action_inputs.start_box, 'start_box is required');\n assert(action.action_inputs.end_box, 'end_box is required');\n const startPoint = getPoint(action.action_inputs.start_box);\n const endPoint = getPoint(action.action_inputs.end_box);\n transformActions.push({\n type: 'DragAndDrop',\n param: {\n from: pointToLocateParam(startPoint, action.thought, shotSize),\n to: pointToLocateParam(endPoint, action.thought, shotSize),\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'type') {\n transformActions.push({\n type: 'Input',\n param: {\n value: action.action_inputs.content,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'scroll') {\n transformActions.push({\n type: 'Scroll',\n param: {\n direction: action.action_inputs.direction,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'finished') {\n shouldContinue = false;\n transformActions.push({\n type: 'Finished',\n param: {},\n thought: action.action_inputs.content || action.thought || '',\n });\n } else if (actionType === 'hotkey') {\n if (!action.action_inputs.key) {\n warnLog('No key found in action: hotkey. Will not perform action.');\n } else {\n const keys = transformHotkeyInput(action.action_inputs.key);\n\n transformActions.push({\n type: 'KeyboardPress',\n param: {\n keyName: keys.join('+'),\n },\n thought: action.thought || '',\n });\n }\n } else if (actionType === 'wait') {\n transformActions.push({\n type: 'Sleep',\n param: {\n timeMs: 1000,\n },\n thought: action.thought || '',\n });\n } else if (actionType) {\n // Track unhandled action types\n unhandledActions.push({\n type: actionType,\n thought: action.thought || '',\n });\n debug('Unhandled action type:', actionType, 'thought:', action.thought);\n }\n });\n\n if (transformActions.length === 0) {\n const errorDetails: string[] = [];\n\n // Check if parsing failed\n if (parsed.length === 0) {\n errorDetails.push('Action parser returned no actions');\n\n // Check if response has Thought but no Action\n if (\n res.content.includes('Thought:') &&\n !res.content.includes('Action:')\n ) {\n errorDetails.push(\n 'Response contains \"Thought:\" but missing \"Action:\" line',\n );\n } else {\n errorDetails.push('Response may be malformed or empty');\n }\n }\n\n // Check if we have unhandled action types\n if (unhandledActions.length > 0) {\n const types = unhandledActions.map((a) => a.type).join(', ');\n errorDetails.push(`Unhandled action types: ${types}`);\n }\n\n const errorMessage = [\n 'No actions found in UI-TARS response.',\n ...errorDetails,\n ].join('\\n');\n\n // Throw AIResponseParseError with usage and rawResponse preserved\n throw new AIResponseParseError(\n errorMessage,\n JSON.stringify(res.content, undefined, 2),\n res.usage,\n );\n }\n\n debug('transformActions', JSON.stringify(transformActions, null, 2));\n const log = getSummary(res.content);\n\n conversationHistory.append({\n role: 'assistant',\n content: log,\n });\n\n return {\n actions: transformActions,\n log,\n usage: res.usage,\n rawResponse: JSON.stringify(res.content, undefined, 2),\n shouldContinuePlanning: shouldContinue,\n };\n}\n\n/**\n * Converts bounding box notation to coordinate points\n * @param text - The text containing bbox tags to be converted\n * @returns The text with bbox tags replaced by coordinate points\n */\nfunction convertBboxToCoordinates(text: string): string {\n // Match the four numbers after <bbox>\n const pattern = /<bbox>(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)<\\/bbox>/g;\n\n function replaceMatch(\n match: string,\n x1: string,\n y1: string,\n x2: string,\n y2: string,\n ): string {\n // Convert strings to numbers and calculate center point\n const x1Num = Number.parseInt(x1, 10);\n const y1Num = Number.parseInt(y1, 10);\n const x2Num = Number.parseInt(x2, 10);\n const y2Num = Number.parseInt(y2, 10);\n\n // Use Math.floor to truncate and calculate center point\n const x = Math.floor((x1Num + x2Num) / 2);\n const y = Math.floor((y1Num + y2Num) / 2);\n\n // Return formatted coordinate string\n return `(${x},${y})`;\n }\n\n // Remove common model wrappers before handing the response to UI-TARS parser.\n const cleanedText = text\n .replace(/\\[EOS\\]/g, '')\n .replace(/```(?:[a-zA-Z0-9_-]+)?/g, '');\n return cleanedText.replace(pattern, replaceMatch).trim();\n}\n\nfunction getPoint(startBox: string): [number, number] {\n const [x, y] = JSON.parse(startBox);\n assert(\n typeof x === 'number' &&\n Number.isFinite(x) &&\n typeof y === 'number' &&\n Number.isFinite(y),\n `invalid point data for ui-tars planning: ${startBox}`,\n );\n return [x, y];\n}\n\ninterface BaseAction {\n action_type: ActionType;\n action_inputs: Record<string, any>;\n reflection: string | null;\n thought: string | null;\n}\n\ninterface ClickAction extends BaseAction {\n action_type: 'click';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface DragAction extends BaseAction {\n action_type: 'drag';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n end_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface WaitAction extends BaseAction {\n action_type: 'wait';\n action_inputs: {\n time: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface LeftDoubleAction extends BaseAction {\n action_type: 'left_double';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface RightSingleAction extends BaseAction {\n action_type: 'right_single';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface TypeAction extends BaseAction {\n action_type: 'type';\n action_inputs: {\n content: string;\n };\n}\n\ninterface HotkeyAction extends BaseAction {\n action_type: 'hotkey';\n action_inputs: {\n key: string;\n };\n}\n\ninterface ScrollAction extends BaseAction {\n action_type: 'scroll';\n action_inputs: {\n direction: 'up' | 'down';\n };\n}\n\ninterface FinishedAction extends BaseAction {\n action_type: 'finished';\n action_inputs: {\n content?: string;\n };\n}\n\nexport type Action =\n | ClickAction\n | LeftDoubleAction\n | RightSingleAction\n | DragAction\n | TypeAction\n | HotkeyAction\n | ScrollAction\n | FinishedAction\n | WaitAction;\n"],"names":["debug","getDebug","warnLog","pointToLocateParam","point","thought","size","ctx","pixelBbox","mapLocateResultToPixelBboxByCoordinates","finalizePixelBbox","uiTarsPlanning","userInstruction","options","uiTarsModelVersion","conversationHistory","context","modelRuntime","actionContext","instruction","systemPrompt","getUiTarsPlanningPrompt","screenshotBase64","res","callAIWithStringResponse","convertedText","parsed","convertBboxToCoordinates","shotSize","parseResult","actionParser","parseError","errorMessage","Error","String","AIResponseParseError","JSON","undefined","transformActions","unhandledActions","shouldContinue","action","actionType","assert","getPoint","locate","startPoint","endPoint","keys","transformHotkeyInput","errorDetails","types","a","log","getSummary","text","pattern","replaceMatch","match","x1","y1","x2","y2","x1Num","Number","y1Num","x2Num","y2Num","x","Math","y","cleanedText","startBox"],"mappings":";;;;;;;;AAkCA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,UAAUD,SAAS,oBAAoB;IAAE,SAAS;AAAK;AAE7D,SAASE,mBACPC,KAAuB,EACvBC,OAAsB,EACtBC,IAAU;IAEV,MAAMC,MAAM;QAAE,cAAcD;IAAK;IACjC,MAAME,YAAYC,wCAChB;QAAE,MAAM;QAAS,aAAaL;IAAM,GACpCG,KACA;QAAE,OAAO;QAAS,OAAO;QAAM,cAAc;IAAE;IAGjD,OAAO;QACL,QAAQF,WAAW;QACnB,kBAAkBK,kBAAkBF,WAAWJ,OAAOG;IACxD;AACF;AAEO,eAAeI,eACpBC,eAAuB,EACvBC,OAAoB,EACpBC,kBAAsC;IAEtC,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,YAAY,EAAEC,aAAa,EAAE,GAAGL;IAEtE,IAAIM,cAAcP;IAClB,IAAIM,eACFC,cAAc,CAAC,yBAAyB,EAAED,cAAc,8CAA8C,EAAEN,gBAAgB,mBAAmB,CAAC;IAG9I,MAAMQ,eAAeC,4BAA4BF;IAEjD,MAAMG,mBAAmBN,QAAQ,UAAU,CAAC,MAAM;IAElDD,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKO;gBACP;YACF;SACD;IACH;IAEA,MAAMC,MAAM,MAAMC,yBAChB;QACE;YACE,MAAM;YACN,SAASJ;QACX;WACGL,oBAAoB,QAAQ;KAChC,EACDE,cACA;QACE,aAAaJ,QAAQ,WAAW;IAClC;IAGF,IAAIY;IACJ,IAAIC;IAEJ,IAAI;QACFD,gBAAgBE,yBAAyBJ,IAAI,OAAO;QAEpD,MAAM,EAAEK,QAAQ,EAAE,GAAGZ;QACrB,MAAMa,cAAcC,aAAa;YAC/B,YAAYL;YACZ,QAAQ;gBAAC;gBAAM;aAAK;YACpB,eAAe;gBACb,OAAOG,SAAS,KAAK;gBACrB,QAAQA,SAAS,MAAM;YACzB;YACA,UAAUd;QACZ;QACAY,SAASG,YAAY,MAAM;IAC7B,EAAE,OAAOE,YAAY;QAEnB,MAAMC,eACJD,sBAAsBE,QAAQF,WAAW,OAAO,GAAGG,OAAOH;QAC5D,MAAM,IAAII,qBACR,CAAC,aAAa,EAAEH,cAAc,EAC9BI,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW,IACvCd,IAAI,KAAK;IAEb;IAEA,MAAM,EAAEK,QAAQ,EAAE,GAAGZ;IAErBhB,MACE,oBACAc,oBACA,YACAsB,KAAK,SAAS,CAACV;IAGjB,MAAMY,mBAAqC,EAAE;IAC7C,MAAMC,mBAA6D,EAAE;IACrE,IAAIC,iBAAiB;IACrBd,OAAO,OAAO,CAAC,CAACe;QACd,MAAMC,aAAcD,AAAAA,CAAAA,OAAO,WAAW,IAAI,EAAC,EAAG,WAAW;QACzD,IAAIC,AAAe,YAAfA,YAAwB;YAC1BC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMrC,QAAQwC,SAASH,OAAO,aAAa,CAAC,SAAS;YAErD,MAAMI,SAAS1C,mBAAmBC,OAAOqC,OAAO,OAAO,EAAEb;YAEzDU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACLO;gBACF;YACF;QACF,OAAO,IAAIH,AAAe,kBAAfA,YAA8B;YACvCC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMrC,QAAQwC,SAASH,OAAO,aAAa,CAAC,SAAS;YAErD,MAAMI,SAAS1C,mBAAmBC,OAAOqC,OAAO,OAAO,EAAEb;YAEzDU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACLO;gBACF;gBACA,SAASJ,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,mBAAfA,YAA+B;YACxCC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMrC,QAAQwC,SAASH,OAAO,aAAa,CAAC,SAAS;YAErD,MAAMI,SAAS1C,mBAAmBC,OAAOqC,OAAO,OAAO,EAAEb;YAEzDU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACLO;gBACF;gBACA,SAASJ,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YAAuB;YAChCC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvCE,OAAOF,OAAO,aAAa,CAAC,OAAO,EAAE;YACrC,MAAMK,aAAaF,SAASH,OAAO,aAAa,CAAC,SAAS;YAC1D,MAAMM,WAAWH,SAASH,OAAO,aAAa,CAAC,OAAO;YACtDH,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,MAAMnC,mBAAmB2C,YAAYL,OAAO,OAAO,EAAEb;oBACrD,IAAIzB,mBAAmB4C,UAAUN,OAAO,OAAO,EAAEb;gBACnD;gBACA,SAASa,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,OAAOG,OAAO,aAAa,CAAC,OAAO;YACrC;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,aAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,WAAWG,OAAO,aAAa,CAAC,SAAS;YAC3C;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,eAAfA,YAA2B;YACpCF,iBAAiB;YACjBF,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO,CAAC;gBACR,SAASG,OAAO,aAAa,CAAC,OAAO,IAAIA,OAAO,OAAO,IAAI;YAC7D;QACF,OAAO,IAAIC,AAAe,aAAfA,YACT,IAAKD,OAAO,aAAa,CAAC,GAAG,EAEtB;YACL,MAAMO,OAAOC,qBAAqBR,OAAO,aAAa,CAAC,GAAG;YAE1DH,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,SAASU,KAAK,IAAI,CAAC;gBACrB;gBACA,SAASP,OAAO,OAAO,IAAI;YAC7B;QACF,OAXEvC,QAAQ;aAYL,IAAIwC,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,QAAQ;YACV;YACA,SAASG,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,YAAY;YAErBH,iBAAiB,IAAI,CAAC;gBACpB,MAAMG;gBACN,SAASD,OAAO,OAAO,IAAI;YAC7B;YACAzC,MAAM,0BAA0B0C,YAAY,YAAYD,OAAO,OAAO;QACxE;IACF;IAEA,IAAIH,AAA4B,MAA5BA,iBAAiB,MAAM,EAAQ;QACjC,MAAMY,eAAyB,EAAE;QAGjC,IAAIxB,AAAkB,MAAlBA,OAAO,MAAM,EAAQ;YACvBwB,aAAa,IAAI,CAAC;YAGlB,IACE3B,IAAI,OAAO,CAAC,QAAQ,CAAC,eACrB,CAACA,IAAI,OAAO,CAAC,QAAQ,CAAC,YAEtB2B,aAAa,IAAI,CACf;iBAGFA,aAAa,IAAI,CAAC;QAEtB;QAGA,IAAIX,iBAAiB,MAAM,GAAG,GAAG;YAC/B,MAAMY,QAAQZ,iBAAiB,GAAG,CAAC,CAACa,IAAMA,EAAE,IAAI,EAAE,IAAI,CAAC;YACvDF,aAAa,IAAI,CAAC,CAAC,wBAAwB,EAAEC,OAAO;QACtD;QAEA,MAAMnB,eAAe;YACnB;eACGkB;SACJ,CAAC,IAAI,CAAC;QAGP,MAAM,IAAIf,qBACRH,cACAI,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW,IACvCd,IAAI,KAAK;IAEb;IAEAvB,MAAM,oBAAoBoC,KAAK,SAAS,CAACE,kBAAkB,MAAM;IACjE,MAAMe,MAAMC,WAAW/B,IAAI,OAAO;IAElCR,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAASsC;IACX;IAEA,OAAO;QACL,SAASf;QACTe;QACA,OAAO9B,IAAI,KAAK;QAChB,aAAaa,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW;QACpD,wBAAwBG;IAC1B;AACF;AAOA,SAASb,yBAAyB4B,IAAY;IAE5C,MAAMC,UAAU;IAEhB,SAASC,aACPC,KAAa,EACbC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU;QAGV,MAAMC,QAAQC,OAAO,QAAQ,CAACL,IAAI;QAClC,MAAMM,QAAQD,OAAO,QAAQ,CAACJ,IAAI;QAClC,MAAMM,QAAQF,OAAO,QAAQ,CAACH,IAAI;QAClC,MAAMM,QAAQH,OAAO,QAAQ,CAACF,IAAI;QAGlC,MAAMM,IAAIC,KAAK,KAAK,CAAEN,AAAAA,CAAAA,QAAQG,KAAI,IAAK;QACvC,MAAMI,IAAID,KAAK,KAAK,CAAEJ,AAAAA,CAAAA,QAAQE,KAAI,IAAK;QAGvC,OAAO,CAAC,CAAC,EAAEC,EAAE,CAAC,EAAEE,EAAE,CAAC,CAAC;IACtB;IAGA,MAAMC,cAAchB,KACjB,OAAO,CAAC,YAAY,IACpB,OAAO,CAAC,2BAA2B;IACtC,OAAOgB,YAAY,OAAO,CAACf,SAASC,cAAc,IAAI;AACxD;AAEA,SAASb,SAAS4B,QAAgB;IAChC,MAAM,CAACJ,GAAGE,EAAE,GAAGlC,KAAK,KAAK,CAACoC;IAC1B7B,OACE,AAAa,YAAb,OAAOyB,KACLJ,OAAO,QAAQ,CAACI,MAChB,AAAa,YAAb,OAAOE,KACPN,OAAO,QAAQ,CAACM,IAClB,CAAC,yCAAyC,EAAEE,UAAU;IAExD,OAAO;QAACJ;QAAGE;KAAE;AACf"}
1
+ {"version":3,"file":"ai-model/models/ui-tars/planning.mjs","sources":["../../../../../src/ai-model/models/ui-tars/planning.ts"],"sourcesContent":["import { type TUserPrompt, userPromptToString } from '@/common';\nimport type {\n PlanningAIResponse,\n PlanningAction,\n PlanningLocateParamWithLocatedPixelBbox,\n Size,\n} from '@/types';\nimport type { UITarsModelVersion } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { transformHotkeyInput } from '@midscene/shared/us-keyboard-layout';\nimport { assert } from '@midscene/shared/utils';\nimport { actionParser } from '@ui-tars/action-parser';\nimport {\n getSummary,\n getUiTarsPlanningPrompt,\n} from '../../prompt/ui-tars-planning';\nimport {\n AIResponseParseError,\n callAIWithStringResponse,\n} from '../../service-caller/index';\nimport { finalizePixelBbox } from '../../shared/model-locate-result/bbox';\nimport { mapLocateResultToPixelBboxByCoordinates } from '../../shared/model-locate-result/pixel-bbox-mapper';\nimport type { PlanOptions } from '../../workflows/planning/types';\n\ntype ActionType =\n | 'click'\n | 'left_double'\n | 'right_single'\n | 'drag'\n | 'type'\n | 'hotkey'\n | 'finished'\n | 'scroll'\n | 'wait';\n\nconst debug = getDebug('ui-tars-planning');\nconst warnLog = getDebug('ui-tars-planning', { console: true });\n\nfunction pointToLocateParam(\n point: [number, number],\n thought: string | null,\n size: Size,\n): PlanningLocateParamWithLocatedPixelBbox {\n const ctx = { preparedSize: size };\n const pixelBbox = mapLocateResultToPixelBboxByCoordinates(\n { type: 'point', coordinates: point },\n ctx,\n { shape: 'point', order: 'xy', normalizedBy: 1 },\n );\n\n return {\n prompt: thought || '',\n locatedPixelBbox: finalizePixelBbox(pixelBbox, point, ctx),\n };\n}\n\nexport async function uiTarsPlanning(\n userInstruction: TUserPrompt,\n options: PlanOptions,\n uiTarsModelVersion: UITarsModelVersion,\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, modelRuntime, actionContext } = options;\n\n const userInstructionText = userPromptToString(userInstruction);\n let instruction = userInstructionText;\n if (actionContext) {\n instruction = `<high_priority_knowledge>${actionContext}</high_priority_knowledge>\\n<user_instruction>${userInstructionText}</user_instruction>`;\n }\n\n const systemPrompt = getUiTarsPlanningPrompt() + instruction;\n\n const screenshotBase64 = context.screenshot.base64;\n const referenceImageMessages = options.referenceImageMessages ?? [];\n\n conversationHistory.append({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n },\n },\n ],\n });\n\n const res = await callAIWithStringResponse(\n [\n {\n role: 'user',\n content: systemPrompt,\n },\n ...referenceImageMessages,\n ...conversationHistory.snapshot(),\n ],\n modelRuntime,\n {\n abortSignal: options.abortSignal,\n },\n );\n\n let convertedText: string;\n let parsed: ReturnType<typeof actionParser>['parsed'];\n\n try {\n convertedText = convertBboxToCoordinates(res.content);\n\n const { shotSize } = context;\n const parseResult = actionParser({\n prediction: convertedText,\n factor: [1000, 1000],\n screenContext: {\n width: shotSize.width,\n height: shotSize.height,\n },\n modelVer: uiTarsModelVersion,\n });\n parsed = parseResult.parsed;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `Parse error: ${errorMessage}`,\n JSON.stringify(res.content, undefined, 2),\n res.usage,\n );\n }\n\n const { shotSize } = context;\n\n debug(\n 'ui-tars modelVer',\n uiTarsModelVersion,\n ', parsed',\n JSON.stringify(parsed),\n );\n\n const transformActions: PlanningAction[] = [];\n const unhandledActions: Array<{ type: string; thought: string }> = [];\n let shouldContinue = true;\n parsed.forEach((action) => {\n const actionType = (action.action_type || '').toLowerCase();\n if (actionType === 'click') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box);\n\n const locate = pointToLocateParam(point, action.thought, shotSize);\n\n transformActions.push({\n type: 'Tap',\n param: {\n locate,\n },\n });\n } else if (actionType === 'left_double') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box);\n\n const locate = pointToLocateParam(point, action.thought, shotSize);\n\n transformActions.push({\n type: 'DoubleClick',\n param: {\n locate,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'right_single') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box);\n\n const locate = pointToLocateParam(point, action.thought, shotSize);\n\n transformActions.push({\n type: 'RightClick',\n param: {\n locate,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'drag') {\n assert(action.action_inputs.start_box, 'start_box is required');\n assert(action.action_inputs.end_box, 'end_box is required');\n const startPoint = getPoint(action.action_inputs.start_box);\n const endPoint = getPoint(action.action_inputs.end_box);\n transformActions.push({\n type: 'DragAndDrop',\n param: {\n from: pointToLocateParam(startPoint, action.thought, shotSize),\n to: pointToLocateParam(endPoint, action.thought, shotSize),\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'type') {\n transformActions.push({\n type: 'Input',\n param: {\n value: action.action_inputs.content,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'scroll') {\n transformActions.push({\n type: 'Scroll',\n param: {\n direction: action.action_inputs.direction,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'finished') {\n shouldContinue = false;\n transformActions.push({\n type: 'Finished',\n param: {},\n thought: action.action_inputs.content || action.thought || '',\n });\n } else if (actionType === 'hotkey') {\n if (!action.action_inputs.key) {\n warnLog('No key found in action: hotkey. Will not perform action.');\n } else {\n const keys = transformHotkeyInput(action.action_inputs.key);\n\n transformActions.push({\n type: 'KeyboardPress',\n param: {\n keyName: keys.join('+'),\n },\n thought: action.thought || '',\n });\n }\n } else if (actionType === 'wait') {\n transformActions.push({\n type: 'Sleep',\n param: {\n timeMs: 1000,\n },\n thought: action.thought || '',\n });\n } else if (actionType) {\n // Track unhandled action types\n unhandledActions.push({\n type: actionType,\n thought: action.thought || '',\n });\n debug('Unhandled action type:', actionType, 'thought:', action.thought);\n }\n });\n\n if (transformActions.length === 0) {\n const errorDetails: string[] = [];\n\n // Check if parsing failed\n if (parsed.length === 0) {\n errorDetails.push('Action parser returned no actions');\n\n // Check if response has Thought but no Action\n if (\n res.content.includes('Thought:') &&\n !res.content.includes('Action:')\n ) {\n errorDetails.push(\n 'Response contains \"Thought:\" but missing \"Action:\" line',\n );\n } else {\n errorDetails.push('Response may be malformed or empty');\n }\n }\n\n // Check if we have unhandled action types\n if (unhandledActions.length > 0) {\n const types = unhandledActions.map((a) => a.type).join(', ');\n errorDetails.push(`Unhandled action types: ${types}`);\n }\n\n const errorMessage = [\n 'No actions found in UI-TARS response.',\n ...errorDetails,\n ].join('\\n');\n\n // Throw AIResponseParseError with usage and rawResponse preserved\n throw new AIResponseParseError(\n errorMessage,\n JSON.stringify(res.content, undefined, 2),\n res.usage,\n );\n }\n\n debug('transformActions', JSON.stringify(transformActions, null, 2));\n const log = getSummary(res.content);\n\n conversationHistory.append({\n role: 'assistant',\n content: log,\n });\n\n return {\n actions: transformActions,\n log,\n usage: res.usage,\n rawResponse: JSON.stringify(res.content, undefined, 2),\n shouldContinuePlanning: shouldContinue,\n };\n}\n\n/**\n * Converts bounding box notation to coordinate points\n * @param text - The text containing bbox tags to be converted\n * @returns The text with bbox tags replaced by coordinate points\n */\nfunction convertBboxToCoordinates(text: string): string {\n // Match the four numbers after <bbox>\n const pattern = /<bbox>(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)<\\/bbox>/g;\n\n function replaceMatch(\n match: string,\n x1: string,\n y1: string,\n x2: string,\n y2: string,\n ): string {\n // Convert strings to numbers and calculate center point\n const x1Num = Number.parseInt(x1, 10);\n const y1Num = Number.parseInt(y1, 10);\n const x2Num = Number.parseInt(x2, 10);\n const y2Num = Number.parseInt(y2, 10);\n\n // Use Math.floor to truncate and calculate center point\n const x = Math.floor((x1Num + x2Num) / 2);\n const y = Math.floor((y1Num + y2Num) / 2);\n\n // Return formatted coordinate string\n return `(${x},${y})`;\n }\n\n // Remove common model wrappers before handing the response to UI-TARS parser.\n const cleanedText = text\n .replace(/\\[EOS\\]/g, '')\n .replace(/```(?:[a-zA-Z0-9_-]+)?/g, '');\n return cleanedText.replace(pattern, replaceMatch).trim();\n}\n\nfunction getPoint(startBox: string): [number, number] {\n const [x, y] = JSON.parse(startBox);\n assert(\n typeof x === 'number' &&\n Number.isFinite(x) &&\n typeof y === 'number' &&\n Number.isFinite(y),\n `invalid point data for ui-tars planning: ${startBox}`,\n );\n return [x, y];\n}\n\ninterface BaseAction {\n action_type: ActionType;\n action_inputs: Record<string, any>;\n reflection: string | null;\n thought: string | null;\n}\n\ninterface ClickAction extends BaseAction {\n action_type: 'click';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface DragAction extends BaseAction {\n action_type: 'drag';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n end_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface WaitAction extends BaseAction {\n action_type: 'wait';\n action_inputs: {\n time: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface LeftDoubleAction extends BaseAction {\n action_type: 'left_double';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface RightSingleAction extends BaseAction {\n action_type: 'right_single';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface TypeAction extends BaseAction {\n action_type: 'type';\n action_inputs: {\n content: string;\n };\n}\n\ninterface HotkeyAction extends BaseAction {\n action_type: 'hotkey';\n action_inputs: {\n key: string;\n };\n}\n\ninterface ScrollAction extends BaseAction {\n action_type: 'scroll';\n action_inputs: {\n direction: 'up' | 'down';\n };\n}\n\ninterface FinishedAction extends BaseAction {\n action_type: 'finished';\n action_inputs: {\n content?: string;\n };\n}\n\nexport type Action =\n | ClickAction\n | LeftDoubleAction\n | RightSingleAction\n | DragAction\n | TypeAction\n | HotkeyAction\n | ScrollAction\n | FinishedAction\n | WaitAction;\n"],"names":["debug","getDebug","warnLog","pointToLocateParam","point","thought","size","ctx","pixelBbox","mapLocateResultToPixelBboxByCoordinates","finalizePixelBbox","uiTarsPlanning","userInstruction","options","uiTarsModelVersion","conversationHistory","context","modelRuntime","actionContext","userInstructionText","userPromptToString","instruction","systemPrompt","getUiTarsPlanningPrompt","screenshotBase64","referenceImageMessages","res","callAIWithStringResponse","convertedText","parsed","convertBboxToCoordinates","shotSize","parseResult","actionParser","parseError","errorMessage","Error","String","AIResponseParseError","JSON","undefined","transformActions","unhandledActions","shouldContinue","action","actionType","assert","getPoint","locate","startPoint","endPoint","keys","transformHotkeyInput","errorDetails","types","a","log","getSummary","text","pattern","replaceMatch","match","x1","y1","x2","y2","x1Num","Number","y1Num","x2Num","y2Num","x","Math","y","cleanedText","startBox"],"mappings":";;;;;;;;;AAmCA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,UAAUD,SAAS,oBAAoB;IAAE,SAAS;AAAK;AAE7D,SAASE,mBACPC,KAAuB,EACvBC,OAAsB,EACtBC,IAAU;IAEV,MAAMC,MAAM;QAAE,cAAcD;IAAK;IACjC,MAAME,YAAYC,wCAChB;QAAE,MAAM;QAAS,aAAaL;IAAM,GACpCG,KACA;QAAE,OAAO;QAAS,OAAO;QAAM,cAAc;IAAE;IAGjD,OAAO;QACL,QAAQF,WAAW;QACnB,kBAAkBK,kBAAkBF,WAAWJ,OAAOG;IACxD;AACF;AAEO,eAAeI,eACpBC,eAA4B,EAC5BC,OAAoB,EACpBC,kBAAsC;IAEtC,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,YAAY,EAAEC,aAAa,EAAE,GAAGL;IAEtE,MAAMM,sBAAsBC,mBAAmBR;IAC/C,IAAIS,cAAcF;IAClB,IAAID,eACFG,cAAc,CAAC,yBAAyB,EAAEH,cAAc,8CAA8C,EAAEC,oBAAoB,mBAAmB,CAAC;IAGlJ,MAAMG,eAAeC,4BAA4BF;IAEjD,MAAMG,mBAAmBR,QAAQ,UAAU,CAAC,MAAM;IAClD,MAAMS,yBAAyBZ,QAAQ,sBAAsB,IAAI,EAAE;IAEnEE,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKS;gBACP;YACF;SACD;IACH;IAEA,MAAME,MAAM,MAAMC,yBAChB;QACE;YACE,MAAM;YACN,SAASL;QACX;WACGG;WACAV,oBAAoB,QAAQ;KAChC,EACDE,cACA;QACE,aAAaJ,QAAQ,WAAW;IAClC;IAGF,IAAIe;IACJ,IAAIC;IAEJ,IAAI;QACFD,gBAAgBE,yBAAyBJ,IAAI,OAAO;QAEpD,MAAM,EAAEK,QAAQ,EAAE,GAAGf;QACrB,MAAMgB,cAAcC,aAAa;YAC/B,YAAYL;YACZ,QAAQ;gBAAC;gBAAM;aAAK;YACpB,eAAe;gBACb,OAAOG,SAAS,KAAK;gBACrB,QAAQA,SAAS,MAAM;YACzB;YACA,UAAUjB;QACZ;QACAe,SAASG,YAAY,MAAM;IAC7B,EAAE,OAAOE,YAAY;QAEnB,MAAMC,eACJD,sBAAsBE,QAAQF,WAAW,OAAO,GAAGG,OAAOH;QAC5D,MAAM,IAAII,qBACR,CAAC,aAAa,EAAEH,cAAc,EAC9BI,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW,IACvCd,IAAI,KAAK;IAEb;IAEA,MAAM,EAAEK,QAAQ,EAAE,GAAGf;IAErBhB,MACE,oBACAc,oBACA,YACAyB,KAAK,SAAS,CAACV;IAGjB,MAAMY,mBAAqC,EAAE;IAC7C,MAAMC,mBAA6D,EAAE;IACrE,IAAIC,iBAAiB;IACrBd,OAAO,OAAO,CAAC,CAACe;QACd,MAAMC,aAAcD,AAAAA,CAAAA,OAAO,WAAW,IAAI,EAAC,EAAG,WAAW;QACzD,IAAIC,AAAe,YAAfA,YAAwB;YAC1BC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMxC,QAAQ2C,SAASH,OAAO,aAAa,CAAC,SAAS;YAErD,MAAMI,SAAS7C,mBAAmBC,OAAOwC,OAAO,OAAO,EAAEb;YAEzDU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACLO;gBACF;YACF;QACF,OAAO,IAAIH,AAAe,kBAAfA,YAA8B;YACvCC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMxC,QAAQ2C,SAASH,OAAO,aAAa,CAAC,SAAS;YAErD,MAAMI,SAAS7C,mBAAmBC,OAAOwC,OAAO,OAAO,EAAEb;YAEzDU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACLO;gBACF;gBACA,SAASJ,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,mBAAfA,YAA+B;YACxCC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMxC,QAAQ2C,SAASH,OAAO,aAAa,CAAC,SAAS;YAErD,MAAMI,SAAS7C,mBAAmBC,OAAOwC,OAAO,OAAO,EAAEb;YAEzDU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACLO;gBACF;gBACA,SAASJ,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YAAuB;YAChCC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvCE,OAAOF,OAAO,aAAa,CAAC,OAAO,EAAE;YACrC,MAAMK,aAAaF,SAASH,OAAO,aAAa,CAAC,SAAS;YAC1D,MAAMM,WAAWH,SAASH,OAAO,aAAa,CAAC,OAAO;YACtDH,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,MAAMtC,mBAAmB8C,YAAYL,OAAO,OAAO,EAAEb;oBACrD,IAAI5B,mBAAmB+C,UAAUN,OAAO,OAAO,EAAEb;gBACnD;gBACA,SAASa,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,OAAOG,OAAO,aAAa,CAAC,OAAO;YACrC;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,aAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,WAAWG,OAAO,aAAa,CAAC,SAAS;YAC3C;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,eAAfA,YAA2B;YACpCF,iBAAiB;YACjBF,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO,CAAC;gBACR,SAASG,OAAO,aAAa,CAAC,OAAO,IAAIA,OAAO,OAAO,IAAI;YAC7D;QACF,OAAO,IAAIC,AAAe,aAAfA,YACT,IAAKD,OAAO,aAAa,CAAC,GAAG,EAEtB;YACL,MAAMO,OAAOC,qBAAqBR,OAAO,aAAa,CAAC,GAAG;YAE1DH,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,SAASU,KAAK,IAAI,CAAC;gBACrB;gBACA,SAASP,OAAO,OAAO,IAAI;YAC7B;QACF,OAXE1C,QAAQ;aAYL,IAAI2C,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,QAAQ;YACV;YACA,SAASG,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,YAAY;YAErBH,iBAAiB,IAAI,CAAC;gBACpB,MAAMG;gBACN,SAASD,OAAO,OAAO,IAAI;YAC7B;YACA5C,MAAM,0BAA0B6C,YAAY,YAAYD,OAAO,OAAO;QACxE;IACF;IAEA,IAAIH,AAA4B,MAA5BA,iBAAiB,MAAM,EAAQ;QACjC,MAAMY,eAAyB,EAAE;QAGjC,IAAIxB,AAAkB,MAAlBA,OAAO,MAAM,EAAQ;YACvBwB,aAAa,IAAI,CAAC;YAGlB,IACE3B,IAAI,OAAO,CAAC,QAAQ,CAAC,eACrB,CAACA,IAAI,OAAO,CAAC,QAAQ,CAAC,YAEtB2B,aAAa,IAAI,CACf;iBAGFA,aAAa,IAAI,CAAC;QAEtB;QAGA,IAAIX,iBAAiB,MAAM,GAAG,GAAG;YAC/B,MAAMY,QAAQZ,iBAAiB,GAAG,CAAC,CAACa,IAAMA,EAAE,IAAI,EAAE,IAAI,CAAC;YACvDF,aAAa,IAAI,CAAC,CAAC,wBAAwB,EAAEC,OAAO;QACtD;QAEA,MAAMnB,eAAe;YACnB;eACGkB;SACJ,CAAC,IAAI,CAAC;QAGP,MAAM,IAAIf,qBACRH,cACAI,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW,IACvCd,IAAI,KAAK;IAEb;IAEA1B,MAAM,oBAAoBuC,KAAK,SAAS,CAACE,kBAAkB,MAAM;IACjE,MAAMe,MAAMC,WAAW/B,IAAI,OAAO;IAElCX,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAASyC;IACX;IAEA,OAAO;QACL,SAASf;QACTe;QACA,OAAO9B,IAAI,KAAK;QAChB,aAAaa,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW;QACpD,wBAAwBG;IAC1B;AACF;AAOA,SAASb,yBAAyB4B,IAAY;IAE5C,MAAMC,UAAU;IAEhB,SAASC,aACPC,KAAa,EACbC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU;QAGV,MAAMC,QAAQC,OAAO,QAAQ,CAACL,IAAI;QAClC,MAAMM,QAAQD,OAAO,QAAQ,CAACJ,IAAI;QAClC,MAAMM,QAAQF,OAAO,QAAQ,CAACH,IAAI;QAClC,MAAMM,QAAQH,OAAO,QAAQ,CAACF,IAAI;QAGlC,MAAMM,IAAIC,KAAK,KAAK,CAAEN,AAAAA,CAAAA,QAAQG,KAAI,IAAK;QACvC,MAAMI,IAAID,KAAK,KAAK,CAAEJ,AAAAA,CAAAA,QAAQE,KAAI,IAAK;QAGvC,OAAO,CAAC,CAAC,EAAEC,EAAE,CAAC,EAAEE,EAAE,CAAC,CAAC;IACtB;IAGA,MAAMC,cAAchB,KACjB,OAAO,CAAC,YAAY,IACpB,OAAO,CAAC,2BAA2B;IACtC,OAAOgB,YAAY,OAAO,CAACf,SAASC,cAAc,IAAI;AACxD;AAEA,SAASb,SAAS4B,QAAgB;IAChC,MAAM,CAACJ,GAAGE,EAAE,GAAGlC,KAAK,KAAK,CAACoC;IAC1B7B,OACE,AAAa,YAAb,OAAOyB,KACLJ,OAAO,QAAQ,CAACI,MAChB,AAAa,YAAb,OAAOE,KACPN,OAAO,QAAQ,CAACM,IAClB,CAAC,yCAAyC,EAAEE,UAAU;IAExD,OAAO;QAACJ;QAAGE;KAAE;AACf"}
@@ -1,7 +1,7 @@
1
- import { assert, isPlainObject } from "@midscene/shared/utils";
2
1
  import { NodeType } from "@midscene/shared/constants";
3
2
  import { treeToList } from "@midscene/shared/extractor";
4
- import { compositeElementInfoImg } from "@midscene/shared/img";
3
+ import { compositeElementInfoImg, preProcessImageUrl } from "@midscene/shared/img";
4
+ import { assert, isPlainObject } from "@midscene/shared/utils";
5
5
  import { z } from "zod";
6
6
  function expandSearchArea(rect, screenSize) {
7
7
  const minArea = 160000;
@@ -93,6 +93,53 @@ const TUserPromptSchema = z.union([
93
93
  prompt: z.string()
94
94
  }).and(TMultimodalPromptSchema.partial())
95
95
  ]);
96
+ const userPromptToString = (prompt)=>'string' == typeof prompt ? prompt : prompt.prompt;
97
+ const userPromptToMultimodalPrompt = (prompt)=>{
98
+ if ('string' == typeof prompt || !prompt.images) return;
99
+ return {
100
+ images: prompt.images,
101
+ convertHttpImage2Base64: !!prompt.convertHttpImage2Base64
102
+ };
103
+ };
104
+ const multimodalPromptToChatMessages = async (multimodalPrompt)=>{
105
+ const msgs = [];
106
+ if (multimodalPrompt?.images?.length) {
107
+ msgs.push({
108
+ role: 'user',
109
+ content: [
110
+ {
111
+ type: 'text',
112
+ text: 'Next, I will provide all the reference images. These reference images are supporting context only, not the current screenshot being evaluated, unless the task explicitly asks for comparison or matching.'
113
+ }
114
+ ]
115
+ });
116
+ for (const item of multimodalPrompt.images){
117
+ const imagePayload = await preProcessImageUrl(item.url, !!multimodalPrompt.convertHttpImage2Base64);
118
+ msgs.push({
119
+ role: 'user',
120
+ content: [
121
+ {
122
+ type: 'text',
123
+ text: `this is the reference image named '${item.name}'. It is a reference image, not the current screenshot:`
124
+ }
125
+ ]
126
+ });
127
+ msgs.push({
128
+ role: 'user',
129
+ content: [
130
+ {
131
+ type: 'image_url',
132
+ image_url: {
133
+ url: imagePayload,
134
+ detail: 'high'
135
+ }
136
+ }
137
+ ]
138
+ });
139
+ }
140
+ }
141
+ return msgs;
142
+ };
96
143
  const locateFieldFlagName = 'midscene_location_field_flag';
97
144
  const MidsceneLocationInput = z.object({
98
145
  prompt: TUserPromptSchema,
@@ -212,6 +259,6 @@ const getReadableTimeString = (format = 'YYYY-MM-DD HH:mm:ss', timestamp)=>{
212
259
  const timeString = format.replace('YYYY', String(year)).replace('MM', month).replace('DD', day).replace('HH', hours).replace('mm', minutes).replace('ss', seconds);
213
260
  return `${timeString} (${format})`;
214
261
  };
215
- export { PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, buildYamlFlowFromPlans, dumpActionParam, dumpMidsceneLocatorField, expandSearchArea, finalizeActionName, findAllMidsceneLocatorField, getMidsceneLocationSchema, getReadableTimeString, ifMidsceneLocatorField, markupImageForLLM, parseActionParam };
262
+ export { PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, buildYamlFlowFromPlans, dumpActionParam, dumpMidsceneLocatorField, expandSearchArea, finalizeActionName, findAllMidsceneLocatorField, getMidsceneLocationSchema, getReadableTimeString, ifMidsceneLocatorField, markupImageForLLM, multimodalPromptToChatMessages, parseActionParam, userPromptToMultimodalPrompt, userPromptToString };
216
263
 
217
264
  //# sourceMappingURL=common.mjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"common.mjs","sources":["../../src/common.ts"],"sourcesContent":["import type {\n BaseElement,\n DeviceAction,\n ElementTreeNode,\n MidsceneYamlFlowItem,\n PlanningAction,\n Rect,\n Size,\n} from '@/types';\nimport { assert, isPlainObject } from '@midscene/shared/utils';\n\nimport { NodeType } from '@midscene/shared/constants';\nimport { treeToList } from '@midscene/shared/extractor';\nimport { compositeElementInfoImg } from '@midscene/shared/img';\nimport { z } from 'zod';\n\n/**\n * Expand the search area to at least 400 x 400 pixels\n *\n * Step 1: Extend 100px on each side (top, right, bottom, left)\n * - If the element is near a boundary, expansion on that side will be limited\n * - No compensation is made for boundary limitations (this is intentional)\n *\n * Step 2: Ensure the area is at least 400x400 pixels\n * - Scale up proportionally from the center if needed\n * - Final result is clamped to screen boundaries\n */\nexport function expandSearchArea(rect: Rect, screenSize: Size): Rect {\n const minArea = 400 * 400;\n const expandSize = 100;\n\n // Step 1: Extend each side by expandSize (100px), clamped to screen boundaries\n // Note: If element is near boundary, actual expansion may be less than 100px on that side\n const expandedLeft = Math.max(rect.left - expandSize, 0);\n const expandedTop = Math.max(rect.top - expandSize, 0);\n\n const expandRect = {\n left: expandedLeft,\n top: expandedTop,\n width: Math.min(\n rect.left - expandedLeft + rect.width + expandSize,\n screenSize.width - expandedLeft,\n ),\n height: Math.min(\n rect.top - expandedTop + rect.height + expandSize,\n screenSize.height - expandedTop,\n ),\n };\n\n // Step 2: Check if area is already >= 400x400\n const currentArea = expandRect.width * expandRect.height;\n\n if (currentArea >= minArea) {\n return expandRect;\n }\n\n // Step 2: Scale up from center to reach minimum 400x400 area\n const centerX = expandRect.left + expandRect.width / 2;\n const centerY = expandRect.top + expandRect.height / 2;\n\n // Calculate scale factor needed to reach minimum area\n const scaleFactor = Math.sqrt(minArea / currentArea);\n const newWidth = Math.round(expandRect.width * scaleFactor);\n const newHeight = Math.round(expandRect.height * scaleFactor);\n\n // Calculate new position based on center point\n const newLeft = Math.round(centerX - newWidth / 2);\n const newTop = Math.round(centerY - newHeight / 2);\n\n // Clamp to screen boundaries\n const left = Math.max(newLeft, 0);\n const top = Math.max(newTop, 0);\n\n return {\n left,\n top,\n width: Math.min(newWidth, screenSize.width - left),\n height: Math.min(newHeight, screenSize.height - top),\n };\n}\n\nexport async function markupImageForLLM(\n screenshotBase64: string,\n tree: ElementTreeNode<BaseElement>,\n size: Size,\n) {\n const elementsInfo = treeToList(tree);\n const elementsPositionInfoWithoutText = elementsInfo!.filter(\n (elementInfo) => {\n if (elementInfo.attributes.nodeType === NodeType.TEXT) {\n return false;\n }\n return true;\n },\n );\n\n const imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n elementsPositionInfo: elementsPositionInfoWithoutText,\n size,\n });\n return imagePayload;\n}\n\nexport function buildYamlFlowFromPlans(\n plans: PlanningAction[],\n actionSpace: DeviceAction<any>[],\n): MidsceneYamlFlowItem[] {\n const flow: MidsceneYamlFlowItem[] = [];\n\n for (const plan of plans) {\n const verb = plan.type;\n\n const action = actionSpace.find((action) => action.name === verb);\n if (!action) {\n console.warn(\n `Cannot convert action ${verb} to yaml flow. Will ignore it.`,\n );\n continue;\n }\n\n const flowKey = action.interfaceAlias || verb;\n const flowParam = action.paramSchema\n ? dumpActionParam(plan.param || {}, action.paramSchema)\n : {};\n\n // For actions whose param is a single string field (e.g. Launch/Terminate's\n // `uri`, RunAdbShell's `command`), inline the value on the flowKey. Writing\n // `{ terminate: '', uri: '...' }` makes the YAML player treat the empty\n // string as the param and drop the sibling `uri`, so cache replay would\n // call the action with an empty argument.\n const shortcutField =\n action.name === 'Launch' || action.interfaceAlias === 'launch'\n ? 'uri'\n : action.name === 'Terminate' || action.interfaceAlias === 'terminate'\n ? 'uri'\n : action.name === 'RunAdbShell' ||\n action.interfaceAlias === 'runAdbShell' ||\n action.name === 'RunHdcShell' ||\n action.interfaceAlias === 'runHdcShell'\n ? 'command'\n : undefined;\n const shortcutKeys = shortcutField ? Object.keys(flowParam) : [];\n const canInlineShortcut =\n shortcutField &&\n shortcutKeys.length === 1 &&\n shortcutKeys[0] === shortcutField &&\n typeof flowParam[shortcutField] === 'string';\n\n const flowItem: MidsceneYamlFlowItem = canInlineShortcut\n ? { [flowKey]: flowParam[shortcutField as string] }\n : { [flowKey]: '', ...flowParam };\n\n flow.push(flowItem);\n }\n\n return flow;\n}\n\n// Zod schemas for shared types\nexport const PointSchema = z.object({\n left: z.number(),\n top: z.number(),\n});\n\nexport const SizeSchema = z.object({\n width: z.number(),\n height: z.number(),\n});\n\nexport const RectSchema = PointSchema.and(SizeSchema).and(\n z.object({\n zoom: z.number().optional(),\n }),\n);\n\n// Zod schema for TMultimodalPrompt\nexport const TMultimodalPromptSchema = z.object({\n images: z\n .array(\n z.object({\n name: z.string(),\n url: z.string(),\n }),\n )\n .optional(),\n convertHttpImage2Base64: z.boolean().optional(),\n});\n\n// Zod schema for TUserPrompt\nexport const TUserPromptSchema = z.union([\n z.string(),\n z\n .object({\n prompt: z.string(),\n })\n .and(TMultimodalPromptSchema.partial()),\n]);\n\n// Generate TypeScript types from Zod schemas\nexport type TMultimodalPrompt = z.infer<typeof TMultimodalPromptSchema>;\nexport type TUserPrompt = z.infer<typeof TUserPromptSchema>;\n\nconst locateFieldFlagName = 'midscene_location_field_flag';\n\n// Schema for locator field input (when users provide locate parameters)\nconst MidsceneLocationInput = z\n .object({\n prompt: TUserPromptSchema,\n deepLocate: z.boolean().optional(),\n deepThink: z\n .boolean()\n .optional()\n .describe('@deprecated Use `deepLocate` instead.'),\n cacheable: z.boolean().optional(),\n xpath: z.union([z.string(), z.boolean()]).optional(),\n })\n .passthrough();\n\n/**\n * Returns the schema for locator fields.\n * This now returns the input schema which is more permissive and suitable for validation.\n */\nexport const getMidsceneLocationSchema = () => {\n return MidsceneLocationInput;\n};\n\nexport const ifMidsceneLocatorField = (field: any): boolean => {\n // Handle optional fields by getting the inner type\n let actualField = field;\n if (actualField._def?.typeName === 'ZodOptional') {\n actualField = actualField._def.innerType;\n }\n\n // Check if this is a ZodObject\n if (actualField._def?.typeName === 'ZodObject') {\n const shape = actualField._def.shape();\n\n // Method 1: Check for the location field flag (for result schema)\n if (locateFieldFlagName in shape) {\n return true;\n }\n\n // Method 2: Check if it's the input schema by checking for 'prompt' field\n // Input schema has 'prompt' as a required field\n if ('prompt' in shape && shape.prompt) {\n return true;\n }\n }\n\n return false;\n};\n\nconst formatPromptWithImages = (\n promptObj: Exclude<TUserPrompt, string>,\n): string => {\n let promptString = promptObj.prompt;\n if (Array.isArray(promptObj.images) && promptObj.images.length > 0) {\n const imageCount = promptObj.images.length;\n promptString += ` (with ${imageCount} image${imageCount > 1 ? 's' : ''})`;\n }\n return promptString;\n};\n\nexport const dumpMidsceneLocatorField = (field: any): string => {\n assert(\n ifMidsceneLocatorField(field),\n 'field is not a midscene locator field',\n );\n\n // If field is a string, return it directly\n if (typeof field === 'string') {\n return field;\n }\n\n // If field is an object with prompt property\n if (field && typeof field === 'object' && field.prompt) {\n // If prompt is a string, return it directly\n if (typeof field.prompt === 'string') {\n return field.prompt;\n }\n // If prompt is a TUserPrompt object, extract the prompt string\n if (typeof field.prompt === 'object' && field.prompt.prompt) {\n return formatPromptWithImages(field.prompt);\n }\n }\n\n // Fallback: try to convert to string\n return String(field);\n};\n\nexport const findAllMidsceneLocatorField = (\n zodType?: z.ZodType<any>,\n requiredOnly?: boolean,\n): string[] => {\n if (!zodType) {\n return [];\n }\n\n // Check if this is a ZodObject by checking if it has a shape property\n const zodObject = zodType as any;\n if (zodObject._def?.typeName === 'ZodObject' && zodObject.shape) {\n const keys = Object.keys(zodObject.shape);\n return keys.filter((key) => {\n const field = zodObject.shape[key];\n if (!ifMidsceneLocatorField(field)) {\n return false;\n }\n\n // If requiredOnly is true, filter out optional fields\n if (requiredOnly) {\n return field._def?.typeName !== 'ZodOptional';\n }\n\n return true;\n });\n }\n\n // For other ZodType instances, we can't extract field names\n return [];\n};\n\nexport const dumpActionParam = (\n jsonObject: Record<string, any>,\n zodSchema: z.ZodType<any>,\n): Record<string, any> => {\n // Prevent spreading strings into {0: 'c', 1: 'o', ...}\n if (!isPlainObject(jsonObject)) {\n return {};\n }\n\n const locatorFields = findAllMidsceneLocatorField(zodSchema);\n const result = { ...jsonObject };\n\n for (const fieldName of locatorFields) {\n const fieldValue = result[fieldName];\n if (fieldValue) {\n // If it's already a string, keep it as is\n if (typeof fieldValue === 'string') {\n result[fieldName] = fieldValue;\n } else if (typeof fieldValue === 'object') {\n // Check if this field is actually a MidsceneLocationType object\n if (fieldValue.prompt) {\n // If prompt is a string, use it directly\n if (typeof fieldValue.prompt === 'string') {\n result[fieldName] = fieldValue.prompt;\n } else if (\n typeof fieldValue.prompt === 'object' &&\n fieldValue.prompt.prompt\n ) {\n // If prompt is a TUserPrompt object, extract the prompt string\n result[fieldName] = formatPromptWithImages(fieldValue.prompt);\n }\n }\n }\n }\n }\n\n return result;\n};\n\n/**\n * Parse and validate action parameters using Zod schema.\n * All fields are validated through Zod, EXCEPT locator fields which are skipped.\n * Default values defined in the schema are automatically applied.\n *\n * Locator fields are special business logic fields with complex validation requirements,\n * so they are intentionally excluded from Zod parsing and use existing validation logic.\n *\n * When shrunkShotToLogicalRatio is provided and !== 1, coordinates in locate fields\n * are transformed from screenshot space to logical space.\n */\nexport const parseActionParam = (\n rawParam: Record<string, any> | undefined,\n zodSchema?: z.ZodType<any>,\n options?: { shrunkShotToLogicalRatio?: number },\n): Record<string, any> | undefined => {\n // If no schema is provided, return undefined (action takes no parameters)\n if (!zodSchema) {\n return undefined;\n }\n\n // Handle undefined or null rawParam by providing an empty object\n const param = rawParam ?? {};\n\n // Find all locate fields in the schema\n const locateFields = findAllMidsceneLocatorField(zodSchema);\n\n // If there are no locate fields, just do normal validation\n if (locateFields.length === 0) {\n return zodSchema.parse(param);\n }\n\n // Extract locate field values to restore later\n const locateFieldValues: Record<string, any> = {};\n for (const fieldName of locateFields) {\n if (fieldName in param) {\n locateFieldValues[fieldName] = param[fieldName];\n }\n }\n\n // Build params for validation - skip locate fields and use dummy values\n const paramsForValidation: Record<string, any> = {};\n for (const key in param) {\n if (locateFields.includes(key)) {\n // Use dummy value to satisfy schema validation\n paramsForValidation[key] = { prompt: '_dummy_' };\n } else {\n paramsForValidation[key] = param[key];\n }\n }\n\n // Validate with dummy locate values\n const validated = zodSchema.parse(paramsForValidation);\n\n // Restore the actual locate field values (unvalidated, as per business requirement),\n // and transform coordinates from screenshot space to logical space if needed\n const ratio = options?.shrunkShotToLogicalRatio;\n for (const fieldName in locateFieldValues) {\n let value = locateFieldValues[fieldName];\n if (\n ratio !== undefined &&\n ratio !== 1 &&\n value &&\n typeof value === 'object' &&\n value.center &&\n value.rect\n ) {\n value = {\n ...value,\n center: [\n Math.round(value.center[0] / ratio),\n Math.round(value.center[1] / ratio),\n ],\n rect: {\n ...value.rect,\n left: Math.round(value.rect.left / ratio),\n top: Math.round(value.rect.top / ratio),\n width: Math.round(value.rect.width / ratio),\n height: Math.round(value.rect.height / ratio),\n },\n };\n }\n validated[fieldName] = value;\n }\n\n return validated;\n};\n\nexport const finalizeActionName = 'Finalize';\n\n/**\n * Get a readable time string for a given timestamp or the current time\n * @param format - Optional format string. Supports: YYYY, MM, DD, HH, mm, ss. Default: 'YYYY-MM-DD HH:mm:ss'\n * @param timestamp - Optional timestamp in milliseconds. If not provided, uses current system time.\n * @returns A formatted time string with format label\n */\nexport const getReadableTimeString = (\n format = 'YYYY-MM-DD HH:mm:ss',\n timestamp?: number,\n): string => {\n const now = timestamp !== undefined ? new Date(timestamp) : new Date();\n const year = now.getFullYear();\n const month = String(now.getMonth() + 1).padStart(2, '0');\n const day = String(now.getDate()).padStart(2, '0');\n const hours = String(now.getHours()).padStart(2, '0');\n const minutes = String(now.getMinutes()).padStart(2, '0');\n const seconds = String(now.getSeconds()).padStart(2, '0');\n\n const timeString = format\n .replace('YYYY', String(year))\n .replace('MM', month)\n .replace('DD', day)\n .replace('HH', hours)\n .replace('mm', minutes)\n .replace('ss', seconds);\n\n return `${timeString} (${format})`;\n};\n"],"names":["expandSearchArea","rect","screenSize","minArea","expandSize","expandedLeft","Math","expandedTop","expandRect","currentArea","centerX","centerY","scaleFactor","newWidth","newHeight","newLeft","newTop","left","top","markupImageForLLM","screenshotBase64","tree","size","elementsInfo","treeToList","elementsPositionInfoWithoutText","elementInfo","NodeType","imagePayload","compositeElementInfoImg","buildYamlFlowFromPlans","plans","actionSpace","flow","plan","verb","action","console","flowKey","flowParam","dumpActionParam","shortcutField","undefined","shortcutKeys","Object","canInlineShortcut","flowItem","PointSchema","z","SizeSchema","RectSchema","TMultimodalPromptSchema","TUserPromptSchema","locateFieldFlagName","MidsceneLocationInput","getMidsceneLocationSchema","ifMidsceneLocatorField","field","actualField","shape","formatPromptWithImages","promptObj","promptString","Array","imageCount","dumpMidsceneLocatorField","assert","String","findAllMidsceneLocatorField","zodType","requiredOnly","zodObject","keys","key","jsonObject","zodSchema","isPlainObject","locatorFields","result","fieldName","fieldValue","parseActionParam","rawParam","options","param","locateFields","locateFieldValues","paramsForValidation","validated","ratio","value","finalizeActionName","getReadableTimeString","format","timestamp","now","Date","year","month","day","hours","minutes","seconds","timeString"],"mappings":";;;;;AA2BO,SAASA,iBAAiBC,IAAU,EAAEC,UAAgB;IAC3D,MAAMC,UAAU;IAChB,MAAMC,aAAa;IAInB,MAAMC,eAAeC,KAAK,GAAG,CAACL,KAAK,IAAI,GAAGG,YAAY;IACtD,MAAMG,cAAcD,KAAK,GAAG,CAACL,KAAK,GAAG,GAAGG,YAAY;IAEpD,MAAMI,aAAa;QACjB,MAAMH;QACN,KAAKE;QACL,OAAOD,KAAK,GAAG,CACbL,KAAK,IAAI,GAAGI,eAAeJ,KAAK,KAAK,GAAGG,YACxCF,WAAW,KAAK,GAAGG;QAErB,QAAQC,KAAK,GAAG,CACdL,KAAK,GAAG,GAAGM,cAAcN,KAAK,MAAM,GAAGG,YACvCF,WAAW,MAAM,GAAGK;IAExB;IAGA,MAAME,cAAcD,WAAW,KAAK,GAAGA,WAAW,MAAM;IAExD,IAAIC,eAAeN,SACjB,OAAOK;IAIT,MAAME,UAAUF,WAAW,IAAI,GAAGA,WAAW,KAAK,GAAG;IACrD,MAAMG,UAAUH,WAAW,GAAG,GAAGA,WAAW,MAAM,GAAG;IAGrD,MAAMI,cAAcN,KAAK,IAAI,CAACH,UAAUM;IACxC,MAAMI,WAAWP,KAAK,KAAK,CAACE,WAAW,KAAK,GAAGI;IAC/C,MAAME,YAAYR,KAAK,KAAK,CAACE,WAAW,MAAM,GAAGI;IAGjD,MAAMG,UAAUT,KAAK,KAAK,CAACI,UAAUG,WAAW;IAChD,MAAMG,SAASV,KAAK,KAAK,CAACK,UAAUG,YAAY;IAGhD,MAAMG,OAAOX,KAAK,GAAG,CAACS,SAAS;IAC/B,MAAMG,MAAMZ,KAAK,GAAG,CAACU,QAAQ;IAE7B,OAAO;QACLC;QACAC;QACA,OAAOZ,KAAK,GAAG,CAACO,UAAUX,WAAW,KAAK,GAAGe;QAC7C,QAAQX,KAAK,GAAG,CAACQ,WAAWZ,WAAW,MAAM,GAAGgB;IAClD;AACF;AAEO,eAAeC,kBACpBC,gBAAwB,EACxBC,IAAkC,EAClCC,IAAU;IAEV,MAAMC,eAAeC,WAAWH;IAChC,MAAMI,kCAAkCF,aAAc,MAAM,CAC1D,CAACG;QACC,IAAIA,YAAY,UAAU,CAAC,QAAQ,KAAKC,SAAS,IAAI,EACnD,OAAO;QAET,OAAO;IACT;IAGF,MAAMC,eAAe,MAAMC,wBAAwB;QACjD,gBAAgBT;QAChB,sBAAsBK;QACtBH;IACF;IACA,OAAOM;AACT;AAEO,SAASE,uBACdC,KAAuB,EACvBC,WAAgC;IAEhC,MAAMC,OAA+B,EAAE;IAEvC,KAAK,MAAMC,QAAQH,MAAO;QACxB,MAAMI,OAAOD,KAAK,IAAI;QAEtB,MAAME,SAASJ,YAAY,IAAI,CAAC,CAACI,SAAWA,OAAO,IAAI,KAAKD;QAC5D,IAAI,CAACC,QAAQ;YACXC,QAAQ,IAAI,CACV,CAAC,sBAAsB,EAAEF,KAAK,8BAA8B,CAAC;YAE/D;QACF;QAEA,MAAMG,UAAUF,OAAO,cAAc,IAAID;QACzC,MAAMI,YAAYH,OAAO,WAAW,GAChCI,gBAAgBN,KAAK,KAAK,IAAI,CAAC,GAAGE,OAAO,WAAW,IACpD,CAAC;QAOL,MAAMK,gBACJL,AAAgB,aAAhBA,OAAO,IAAI,IAAiBA,AAA0B,aAA1BA,OAAO,cAAc,GAC7C,QACAA,AAAgB,gBAAhBA,OAAO,IAAI,IAAoBA,AAA0B,gBAA1BA,OAAO,cAAc,GAClD,QACAA,AAAgB,kBAAhBA,OAAO,IAAI,IACTA,AAA0B,kBAA1BA,OAAO,cAAc,IACrBA,AAAgB,kBAAhBA,OAAO,IAAI,IACXA,AAA0B,kBAA1BA,OAAO,cAAc,GACrB,YACAM;QACV,MAAMC,eAAeF,gBAAgBG,OAAO,IAAI,CAACL,aAAa,EAAE;QAChE,MAAMM,oBACJJ,iBACAE,AAAwB,MAAxBA,aAAa,MAAM,IACnBA,YAAY,CAAC,EAAE,KAAKF,iBACpB,AAAoC,YAApC,OAAOF,SAAS,CAACE,cAAc;QAEjC,MAAMK,WAAiCD,oBACnC;YAAE,CAACP,QAAQ,EAAEC,SAAS,CAACE,cAAwB;QAAC,IAChD;YAAE,CAACH,QAAQ,EAAE;YAAI,GAAGC,SAAS;QAAC;QAElCN,KAAK,IAAI,CAACa;IACZ;IAEA,OAAOb;AACT;AAGO,MAAMc,cAAcC,EAAE,MAAM,CAAC;IAClC,MAAMA,EAAE,MAAM;IACd,KAAKA,EAAE,MAAM;AACf;AAEO,MAAMC,aAAaD,EAAE,MAAM,CAAC;IACjC,OAAOA,EAAE,MAAM;IACf,QAAQA,EAAE,MAAM;AAClB;AAEO,MAAME,aAAaH,YAAY,GAAG,CAACE,YAAY,GAAG,CACvDD,EAAE,MAAM,CAAC;IACP,MAAMA,EAAE,MAAM,GAAG,QAAQ;AAC3B;AAIK,MAAMG,0BAA0BH,EAAE,MAAM,CAAC;IAC9C,QAAQA,EAAAA,KACA,CACJA,EAAE,MAAM,CAAC;QACP,MAAMA,EAAE,MAAM;QACd,KAAKA,EAAE,MAAM;IACf,IAED,QAAQ;IACX,yBAAyBA,EAAE,OAAO,GAAG,QAAQ;AAC/C;AAGO,MAAMI,oBAAoBJ,EAAE,KAAK,CAAC;IACvCA,EAAE,MAAM;IACRA,EAAAA,MACS,CAAC;QACN,QAAQA,EAAE,MAAM;IAClB,GACC,GAAG,CAACG,wBAAwB,OAAO;CACvC;AAMD,MAAME,sBAAsB;AAG5B,MAAMC,wBAAwBN,EAAAA,MACrB,CAAC;IACN,QAAQI;IACR,YAAYJ,EAAE,OAAO,GAAG,QAAQ;IAChC,WAAWA,EAAAA,OACD,GACP,QAAQ,GACR,QAAQ,CAAC;IACZ,WAAWA,EAAE,OAAO,GAAG,QAAQ;IAC/B,OAAOA,EAAE,KAAK,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,OAAO;KAAG,EAAE,QAAQ;AACpD,GACC,WAAW;AAMP,MAAMO,4BAA4B,IAChCD;AAGF,MAAME,yBAAyB,CAACC;IAErC,IAAIC,cAAcD;IAClB,IAAIC,YAAY,IAAI,EAAE,aAAa,eACjCA,cAAcA,YAAY,IAAI,CAAC,SAAS;IAI1C,IAAIA,YAAY,IAAI,EAAE,aAAa,aAAa;QAC9C,MAAMC,QAAQD,YAAY,IAAI,CAAC,KAAK;QAGpC,IAAIL,uBAAuBM,OACzB,OAAO;QAKT,IAAI,YAAYA,SAASA,MAAM,MAAM,EACnC,OAAO;IAEX;IAEA,OAAO;AACT;AAEA,MAAMC,yBAAyB,CAC7BC;IAEA,IAAIC,eAAeD,UAAU,MAAM;IACnC,IAAIE,MAAM,OAAO,CAACF,UAAU,MAAM,KAAKA,UAAU,MAAM,CAAC,MAAM,GAAG,GAAG;QAClE,MAAMG,aAAaH,UAAU,MAAM,CAAC,MAAM;QAC1CC,gBAAgB,CAAC,OAAO,EAAEE,WAAW,MAAM,EAAEA,aAAa,IAAI,MAAM,GAAG,CAAC,CAAC;IAC3E;IACA,OAAOF;AACT;AAEO,MAAMG,2BAA2B,CAACR;IACvCS,OACEV,uBAAuBC,QACvB;IAIF,IAAI,AAAiB,YAAjB,OAAOA,OACT,OAAOA;IAIT,IAAIA,SAAS,AAAiB,YAAjB,OAAOA,SAAsBA,MAAM,MAAM,EAAE;QAEtD,IAAI,AAAwB,YAAxB,OAAOA,MAAM,MAAM,EACrB,OAAOA,MAAM,MAAM;QAGrB,IAAI,AAAwB,YAAxB,OAAOA,MAAM,MAAM,IAAiBA,MAAM,MAAM,CAAC,MAAM,EACzD,OAAOG,uBAAuBH,MAAM,MAAM;IAE9C;IAGA,OAAOU,OAAOV;AAChB;AAEO,MAAMW,8BAA8B,CACzCC,SACAC;IAEA,IAAI,CAACD,SACH,OAAO,EAAE;IAIX,MAAME,YAAYF;IAClB,IAAIE,UAAU,IAAI,EAAE,aAAa,eAAeA,UAAU,KAAK,EAAE;QAC/D,MAAMC,OAAO5B,OAAO,IAAI,CAAC2B,UAAU,KAAK;QACxC,OAAOC,KAAK,MAAM,CAAC,CAACC;YAClB,MAAMhB,QAAQc,UAAU,KAAK,CAACE,IAAI;YAClC,IAAI,CAACjB,uBAAuBC,QAC1B,OAAO;YAIT,IAAIa,cACF,OAAOb,MAAM,IAAI,EAAE,aAAa;YAGlC,OAAO;QACT;IACF;IAGA,OAAO,EAAE;AACX;AAEO,MAAMjB,kBAAkB,CAC7BkC,YACAC;IAGA,IAAI,CAACC,cAAcF,aACjB,OAAO,CAAC;IAGV,MAAMG,gBAAgBT,4BAA4BO;IAClD,MAAMG,SAAS;QAAE,GAAGJ,UAAU;IAAC;IAE/B,KAAK,MAAMK,aAAaF,cAAe;QACrC,MAAMG,aAAaF,MAAM,CAACC,UAAU;QACpC,IAAIC,YAEF;YAAA,IAAI,AAAsB,YAAtB,OAAOA,YACTF,MAAM,CAACC,UAAU,GAAGC;iBACf,IAAI,AAAsB,YAAtB,OAAOA,YAEhB;gBAAA,IAAIA,WAAW,MAAM,EAEnB;oBAAA,IAAI,AAA6B,YAA7B,OAAOA,WAAW,MAAM,EAC1BF,MAAM,CAACC,UAAU,GAAGC,WAAW,MAAM;yBAChC,IACL,AAA6B,YAA7B,OAAOA,WAAW,MAAM,IACxBA,WAAW,MAAM,CAAC,MAAM,EAGxBF,MAAM,CAACC,UAAU,GAAGnB,uBAAuBoB,WAAW,MAAM;gBAC9D;YACF;QACF;IAEJ;IAEA,OAAOF;AACT;AAaO,MAAMG,mBAAmB,CAC9BC,UACAP,WACAQ;IAGA,IAAI,CAACR,WACH;IAIF,MAAMS,QAAQF,YAAY,CAAC;IAG3B,MAAMG,eAAejB,4BAA4BO;IAGjD,IAAIU,AAAwB,MAAxBA,aAAa,MAAM,EACrB,OAAOV,UAAU,KAAK,CAACS;IAIzB,MAAME,oBAAyC,CAAC;IAChD,KAAK,MAAMP,aAAaM,aACtB,IAAIN,aAAaK,OACfE,iBAAiB,CAACP,UAAU,GAAGK,KAAK,CAACL,UAAU;IAKnD,MAAMQ,sBAA2C,CAAC;IAClD,IAAK,MAAMd,OAAOW,MAChB,IAAIC,aAAa,QAAQ,CAACZ,MAExBc,mBAAmB,CAACd,IAAI,GAAG;QAAE,QAAQ;IAAU;SAE/Cc,mBAAmB,CAACd,IAAI,GAAGW,KAAK,CAACX,IAAI;IAKzC,MAAMe,YAAYb,UAAU,KAAK,CAACY;IAIlC,MAAME,QAAQN,SAAS;IACvB,IAAK,MAAMJ,aAAaO,kBAAmB;QACzC,IAAII,QAAQJ,iBAAiB,CAACP,UAAU;QACxC,IACEU,AAAU/C,WAAV+C,SACAA,AAAU,MAAVA,SACAC,SACA,AAAiB,YAAjB,OAAOA,SACPA,MAAM,MAAM,IACZA,MAAM,IAAI,EAEVA,QAAQ;YACN,GAAGA,KAAK;YACR,QAAQ;gBACNpF,KAAK,KAAK,CAACoF,MAAM,MAAM,CAAC,EAAE,GAAGD;gBAC7BnF,KAAK,KAAK,CAACoF,MAAM,MAAM,CAAC,EAAE,GAAGD;aAC9B;YACD,MAAM;gBACJ,GAAGC,MAAM,IAAI;gBACb,MAAMpF,KAAK,KAAK,CAACoF,MAAM,IAAI,CAAC,IAAI,GAAGD;gBACnC,KAAKnF,KAAK,KAAK,CAACoF,MAAM,IAAI,CAAC,GAAG,GAAGD;gBACjC,OAAOnF,KAAK,KAAK,CAACoF,MAAM,IAAI,CAAC,KAAK,GAAGD;gBACrC,QAAQnF,KAAK,KAAK,CAACoF,MAAM,IAAI,CAAC,MAAM,GAAGD;YACzC;QACF;QAEFD,SAAS,CAACT,UAAU,GAAGW;IACzB;IAEA,OAAOF;AACT;AAEO,MAAMG,qBAAqB;AAQ3B,MAAMC,wBAAwB,CACnCC,SAAS,qBAAqB,EAC9BC;IAEA,MAAMC,MAAMD,AAAcpD,WAAdoD,YAA0B,IAAIE,KAAKF,aAAa,IAAIE;IAChE,MAAMC,OAAOF,IAAI,WAAW;IAC5B,MAAMG,QAAQ/B,OAAO4B,IAAI,QAAQ,KAAK,GAAG,QAAQ,CAAC,GAAG;IACrD,MAAMI,MAAMhC,OAAO4B,IAAI,OAAO,IAAI,QAAQ,CAAC,GAAG;IAC9C,MAAMK,QAAQjC,OAAO4B,IAAI,QAAQ,IAAI,QAAQ,CAAC,GAAG;IACjD,MAAMM,UAAUlC,OAAO4B,IAAI,UAAU,IAAI,QAAQ,CAAC,GAAG;IACrD,MAAMO,UAAUnC,OAAO4B,IAAI,UAAU,IAAI,QAAQ,CAAC,GAAG;IAErD,MAAMQ,aAAaV,OAChB,OAAO,CAAC,QAAQ1B,OAAO8B,OACvB,OAAO,CAAC,MAAMC,OACd,OAAO,CAAC,MAAMC,KACd,OAAO,CAAC,MAAMC,OACd,OAAO,CAAC,MAAMC,SACd,OAAO,CAAC,MAAMC;IAEjB,OAAO,GAAGC,WAAW,EAAE,EAAEV,OAAO,CAAC,CAAC;AACpC"}
1
+ {"version":3,"file":"common.mjs","sources":["../../src/common.ts"],"sourcesContent":["import type {\n BaseElement,\n DeviceAction,\n ElementTreeNode,\n MidsceneYamlFlowItem,\n PlanningAction,\n Rect,\n Size,\n} from '@/types';\nimport { NodeType } from '@midscene/shared/constants';\nimport { treeToList } from '@midscene/shared/extractor';\nimport {\n compositeElementInfoImg,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { assert, isPlainObject } from '@midscene/shared/utils';\nimport type { ChatCompletionUserMessageParam } from 'openai/resources/index';\nimport { z } from 'zod';\n\n/**\n * Expand the search area to at least 400 x 400 pixels\n *\n * Step 1: Extend 100px on each side (top, right, bottom, left)\n * - If the element is near a boundary, expansion on that side will be limited\n * - No compensation is made for boundary limitations (this is intentional)\n *\n * Step 2: Ensure the area is at least 400x400 pixels\n * - Scale up proportionally from the center if needed\n * - Final result is clamped to screen boundaries\n */\nexport function expandSearchArea(rect: Rect, screenSize: Size): Rect {\n const minArea = 400 * 400;\n const expandSize = 100;\n\n // Step 1: Extend each side by expandSize (100px), clamped to screen boundaries\n // Note: If element is near boundary, actual expansion may be less than 100px on that side\n const expandedLeft = Math.max(rect.left - expandSize, 0);\n const expandedTop = Math.max(rect.top - expandSize, 0);\n\n const expandRect = {\n left: expandedLeft,\n top: expandedTop,\n width: Math.min(\n rect.left - expandedLeft + rect.width + expandSize,\n screenSize.width - expandedLeft,\n ),\n height: Math.min(\n rect.top - expandedTop + rect.height + expandSize,\n screenSize.height - expandedTop,\n ),\n };\n\n // Step 2: Check if area is already >= 400x400\n const currentArea = expandRect.width * expandRect.height;\n\n if (currentArea >= minArea) {\n return expandRect;\n }\n\n // Step 2: Scale up from center to reach minimum 400x400 area\n const centerX = expandRect.left + expandRect.width / 2;\n const centerY = expandRect.top + expandRect.height / 2;\n\n // Calculate scale factor needed to reach minimum area\n const scaleFactor = Math.sqrt(minArea / currentArea);\n const newWidth = Math.round(expandRect.width * scaleFactor);\n const newHeight = Math.round(expandRect.height * scaleFactor);\n\n // Calculate new position based on center point\n const newLeft = Math.round(centerX - newWidth / 2);\n const newTop = Math.round(centerY - newHeight / 2);\n\n // Clamp to screen boundaries\n const left = Math.max(newLeft, 0);\n const top = Math.max(newTop, 0);\n\n return {\n left,\n top,\n width: Math.min(newWidth, screenSize.width - left),\n height: Math.min(newHeight, screenSize.height - top),\n };\n}\n\nexport async function markupImageForLLM(\n screenshotBase64: string,\n tree: ElementTreeNode<BaseElement>,\n size: Size,\n) {\n const elementsInfo = treeToList(tree);\n const elementsPositionInfoWithoutText = elementsInfo!.filter(\n (elementInfo) => {\n if (elementInfo.attributes.nodeType === NodeType.TEXT) {\n return false;\n }\n return true;\n },\n );\n\n const imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n elementsPositionInfo: elementsPositionInfoWithoutText,\n size,\n });\n return imagePayload;\n}\n\nexport function buildYamlFlowFromPlans(\n plans: PlanningAction[],\n actionSpace: DeviceAction<any>[],\n): MidsceneYamlFlowItem[] {\n const flow: MidsceneYamlFlowItem[] = [];\n\n for (const plan of plans) {\n const verb = plan.type;\n\n const action = actionSpace.find((action) => action.name === verb);\n if (!action) {\n console.warn(\n `Cannot convert action ${verb} to yaml flow. Will ignore it.`,\n );\n continue;\n }\n\n const flowKey = action.interfaceAlias || verb;\n const flowParam = action.paramSchema\n ? dumpActionParam(plan.param || {}, action.paramSchema)\n : {};\n\n // For actions whose param is a single string field (e.g. Launch/Terminate's\n // `uri`, RunAdbShell's `command`), inline the value on the flowKey. Writing\n // `{ terminate: '', uri: '...' }` makes the YAML player treat the empty\n // string as the param and drop the sibling `uri`, so cache replay would\n // call the action with an empty argument.\n const shortcutField =\n action.name === 'Launch' || action.interfaceAlias === 'launch'\n ? 'uri'\n : action.name === 'Terminate' || action.interfaceAlias === 'terminate'\n ? 'uri'\n : action.name === 'RunAdbShell' ||\n action.interfaceAlias === 'runAdbShell' ||\n action.name === 'RunHdcShell' ||\n action.interfaceAlias === 'runHdcShell'\n ? 'command'\n : undefined;\n const shortcutKeys = shortcutField ? Object.keys(flowParam) : [];\n const canInlineShortcut =\n shortcutField &&\n shortcutKeys.length === 1 &&\n shortcutKeys[0] === shortcutField &&\n typeof flowParam[shortcutField] === 'string';\n\n const flowItem: MidsceneYamlFlowItem = canInlineShortcut\n ? { [flowKey]: flowParam[shortcutField as string] }\n : { [flowKey]: '', ...flowParam };\n\n flow.push(flowItem);\n }\n\n return flow;\n}\n\n// Zod schemas for shared types\nexport const PointSchema = z.object({\n left: z.number(),\n top: z.number(),\n});\n\nexport const SizeSchema = z.object({\n width: z.number(),\n height: z.number(),\n});\n\nexport const RectSchema = PointSchema.and(SizeSchema).and(\n z.object({\n zoom: z.number().optional(),\n }),\n);\n\n// Zod schema for TMultimodalPrompt\nexport const TMultimodalPromptSchema = z.object({\n images: z\n .array(\n z.object({\n name: z.string(),\n url: z.string(),\n }),\n )\n .optional(),\n convertHttpImage2Base64: z.boolean().optional(),\n});\n\n// Zod schema for TUserPrompt\nexport const TUserPromptSchema = z.union([\n z.string(),\n z\n .object({\n prompt: z.string(),\n })\n .and(TMultimodalPromptSchema.partial()),\n]);\n\n// Generate TypeScript types from Zod schemas\nexport type TMultimodalPrompt = z.infer<typeof TMultimodalPromptSchema>;\nexport type TUserPrompt = z.infer<typeof TUserPromptSchema>;\n\nexport const userPromptToString = (prompt: TUserPrompt): string => {\n return typeof prompt === 'string' ? prompt : prompt.prompt;\n};\n\nexport const userPromptToMultimodalPrompt = (\n prompt: TUserPrompt,\n): TMultimodalPrompt | undefined => {\n if (typeof prompt === 'string' || !prompt.images) {\n return undefined;\n }\n return {\n images: prompt.images,\n convertHttpImage2Base64: !!prompt.convertHttpImage2Base64,\n };\n};\n\nexport const multimodalPromptToChatMessages = async (\n multimodalPrompt?: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images. These reference images are supporting context only, not the current screenshot being evaluated, unless the task explicitly asks for comparison or matching.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const imagePayload = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the reference image named '${item.name}'. It is a reference image, not the current screenshot:`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nconst locateFieldFlagName = 'midscene_location_field_flag';\n\n// Schema for locator field input (when users provide locate parameters)\nconst MidsceneLocationInput = z\n .object({\n prompt: TUserPromptSchema,\n deepLocate: z.boolean().optional(),\n deepThink: z\n .boolean()\n .optional()\n .describe('@deprecated Use `deepLocate` instead.'),\n cacheable: z.boolean().optional(),\n xpath: z.union([z.string(), z.boolean()]).optional(),\n })\n .passthrough();\n\n/**\n * Returns the schema for locator fields.\n * This now returns the input schema which is more permissive and suitable for validation.\n */\nexport const getMidsceneLocationSchema = () => {\n return MidsceneLocationInput;\n};\n\nexport const ifMidsceneLocatorField = (field: any): boolean => {\n // Handle optional fields by getting the inner type\n let actualField = field;\n if (actualField._def?.typeName === 'ZodOptional') {\n actualField = actualField._def.innerType;\n }\n\n // Check if this is a ZodObject\n if (actualField._def?.typeName === 'ZodObject') {\n const shape = actualField._def.shape();\n\n // Method 1: Check for the location field flag (for result schema)\n if (locateFieldFlagName in shape) {\n return true;\n }\n\n // Method 2: Check if it's the input schema by checking for 'prompt' field\n // Input schema has 'prompt' as a required field\n if ('prompt' in shape && shape.prompt) {\n return true;\n }\n }\n\n return false;\n};\n\nconst formatPromptWithImages = (\n promptObj: Exclude<TUserPrompt, string>,\n): string => {\n let promptString = promptObj.prompt;\n if (Array.isArray(promptObj.images) && promptObj.images.length > 0) {\n const imageCount = promptObj.images.length;\n promptString += ` (with ${imageCount} image${imageCount > 1 ? 's' : ''})`;\n }\n return promptString;\n};\n\nexport const dumpMidsceneLocatorField = (field: any): string => {\n assert(\n ifMidsceneLocatorField(field),\n 'field is not a midscene locator field',\n );\n\n // If field is a string, return it directly\n if (typeof field === 'string') {\n return field;\n }\n\n // If field is an object with prompt property\n if (field && typeof field === 'object' && field.prompt) {\n // If prompt is a string, return it directly\n if (typeof field.prompt === 'string') {\n return field.prompt;\n }\n // If prompt is a TUserPrompt object, extract the prompt string\n if (typeof field.prompt === 'object' && field.prompt.prompt) {\n return formatPromptWithImages(field.prompt);\n }\n }\n\n // Fallback: try to convert to string\n return String(field);\n};\n\nexport const findAllMidsceneLocatorField = (\n zodType?: z.ZodType<any>,\n requiredOnly?: boolean,\n): string[] => {\n if (!zodType) {\n return [];\n }\n\n // Check if this is a ZodObject by checking if it has a shape property\n const zodObject = zodType as any;\n if (zodObject._def?.typeName === 'ZodObject' && zodObject.shape) {\n const keys = Object.keys(zodObject.shape);\n return keys.filter((key) => {\n const field = zodObject.shape[key];\n if (!ifMidsceneLocatorField(field)) {\n return false;\n }\n\n // If requiredOnly is true, filter out optional fields\n if (requiredOnly) {\n return field._def?.typeName !== 'ZodOptional';\n }\n\n return true;\n });\n }\n\n // For other ZodType instances, we can't extract field names\n return [];\n};\n\nexport const dumpActionParam = (\n jsonObject: Record<string, any>,\n zodSchema: z.ZodType<any>,\n): Record<string, any> => {\n // Prevent spreading strings into {0: 'c', 1: 'o', ...}\n if (!isPlainObject(jsonObject)) {\n return {};\n }\n\n const locatorFields = findAllMidsceneLocatorField(zodSchema);\n const result = { ...jsonObject };\n\n for (const fieldName of locatorFields) {\n const fieldValue = result[fieldName];\n if (fieldValue) {\n // If it's already a string, keep it as is\n if (typeof fieldValue === 'string') {\n result[fieldName] = fieldValue;\n } else if (typeof fieldValue === 'object') {\n // Check if this field is actually a MidsceneLocationType object\n if (fieldValue.prompt) {\n // If prompt is a string, use it directly\n if (typeof fieldValue.prompt === 'string') {\n result[fieldName] = fieldValue.prompt;\n } else if (\n typeof fieldValue.prompt === 'object' &&\n fieldValue.prompt.prompt\n ) {\n // If prompt is a TUserPrompt object, extract the prompt string\n result[fieldName] = formatPromptWithImages(fieldValue.prompt);\n }\n }\n }\n }\n }\n\n return result;\n};\n\n/**\n * Parse and validate action parameters using Zod schema.\n * All fields are validated through Zod, EXCEPT locator fields which are skipped.\n * Default values defined in the schema are automatically applied.\n *\n * Locator fields are special business logic fields with complex validation requirements,\n * so they are intentionally excluded from Zod parsing and use existing validation logic.\n *\n * When shrunkShotToLogicalRatio is provided and !== 1, coordinates in locate fields\n * are transformed from screenshot space to logical space.\n */\nexport const parseActionParam = (\n rawParam: Record<string, any> | undefined,\n zodSchema?: z.ZodType<any>,\n options?: { shrunkShotToLogicalRatio?: number },\n): Record<string, any> | undefined => {\n // If no schema is provided, return undefined (action takes no parameters)\n if (!zodSchema) {\n return undefined;\n }\n\n // Handle undefined or null rawParam by providing an empty object\n const param = rawParam ?? {};\n\n // Find all locate fields in the schema\n const locateFields = findAllMidsceneLocatorField(zodSchema);\n\n // If there are no locate fields, just do normal validation\n if (locateFields.length === 0) {\n return zodSchema.parse(param);\n }\n\n // Extract locate field values to restore later\n const locateFieldValues: Record<string, any> = {};\n for (const fieldName of locateFields) {\n if (fieldName in param) {\n locateFieldValues[fieldName] = param[fieldName];\n }\n }\n\n // Build params for validation - skip locate fields and use dummy values\n const paramsForValidation: Record<string, any> = {};\n for (const key in param) {\n if (locateFields.includes(key)) {\n // Use dummy value to satisfy schema validation\n paramsForValidation[key] = { prompt: '_dummy_' };\n } else {\n paramsForValidation[key] = param[key];\n }\n }\n\n // Validate with dummy locate values\n const validated = zodSchema.parse(paramsForValidation);\n\n // Restore the actual locate field values (unvalidated, as per business requirement),\n // and transform coordinates from screenshot space to logical space if needed\n const ratio = options?.shrunkShotToLogicalRatio;\n for (const fieldName in locateFieldValues) {\n let value = locateFieldValues[fieldName];\n if (\n ratio !== undefined &&\n ratio !== 1 &&\n value &&\n typeof value === 'object' &&\n value.center &&\n value.rect\n ) {\n value = {\n ...value,\n center: [\n Math.round(value.center[0] / ratio),\n Math.round(value.center[1] / ratio),\n ],\n rect: {\n ...value.rect,\n left: Math.round(value.rect.left / ratio),\n top: Math.round(value.rect.top / ratio),\n width: Math.round(value.rect.width / ratio),\n height: Math.round(value.rect.height / ratio),\n },\n };\n }\n validated[fieldName] = value;\n }\n\n return validated;\n};\n\nexport const finalizeActionName = 'Finalize';\n\n/**\n * Get a readable time string for a given timestamp or the current time\n * @param format - Optional format string. Supports: YYYY, MM, DD, HH, mm, ss. Default: 'YYYY-MM-DD HH:mm:ss'\n * @param timestamp - Optional timestamp in milliseconds. If not provided, uses current system time.\n * @returns A formatted time string with format label\n */\nexport const getReadableTimeString = (\n format = 'YYYY-MM-DD HH:mm:ss',\n timestamp?: number,\n): string => {\n const now = timestamp !== undefined ? new Date(timestamp) : new Date();\n const year = now.getFullYear();\n const month = String(now.getMonth() + 1).padStart(2, '0');\n const day = String(now.getDate()).padStart(2, '0');\n const hours = String(now.getHours()).padStart(2, '0');\n const minutes = String(now.getMinutes()).padStart(2, '0');\n const seconds = String(now.getSeconds()).padStart(2, '0');\n\n const timeString = format\n .replace('YYYY', String(year))\n .replace('MM', month)\n .replace('DD', day)\n .replace('HH', hours)\n .replace('mm', minutes)\n .replace('ss', seconds);\n\n return `${timeString} (${format})`;\n};\n"],"names":["expandSearchArea","rect","screenSize","minArea","expandSize","expandedLeft","Math","expandedTop","expandRect","currentArea","centerX","centerY","scaleFactor","newWidth","newHeight","newLeft","newTop","left","top","markupImageForLLM","screenshotBase64","tree","size","elementsInfo","treeToList","elementsPositionInfoWithoutText","elementInfo","NodeType","imagePayload","compositeElementInfoImg","buildYamlFlowFromPlans","plans","actionSpace","flow","plan","verb","action","console","flowKey","flowParam","dumpActionParam","shortcutField","undefined","shortcutKeys","Object","canInlineShortcut","flowItem","PointSchema","z","SizeSchema","RectSchema","TMultimodalPromptSchema","TUserPromptSchema","userPromptToString","prompt","userPromptToMultimodalPrompt","multimodalPromptToChatMessages","multimodalPrompt","msgs","item","preProcessImageUrl","locateFieldFlagName","MidsceneLocationInput","getMidsceneLocationSchema","ifMidsceneLocatorField","field","actualField","shape","formatPromptWithImages","promptObj","promptString","Array","imageCount","dumpMidsceneLocatorField","assert","String","findAllMidsceneLocatorField","zodType","requiredOnly","zodObject","keys","key","jsonObject","zodSchema","isPlainObject","locatorFields","result","fieldName","fieldValue","parseActionParam","rawParam","options","param","locateFields","locateFieldValues","paramsForValidation","validated","ratio","value","finalizeActionName","getReadableTimeString","format","timestamp","now","Date","year","month","day","hours","minutes","seconds","timeString"],"mappings":";;;;;AA8BO,SAASA,iBAAiBC,IAAU,EAAEC,UAAgB;IAC3D,MAAMC,UAAU;IAChB,MAAMC,aAAa;IAInB,MAAMC,eAAeC,KAAK,GAAG,CAACL,KAAK,IAAI,GAAGG,YAAY;IACtD,MAAMG,cAAcD,KAAK,GAAG,CAACL,KAAK,GAAG,GAAGG,YAAY;IAEpD,MAAMI,aAAa;QACjB,MAAMH;QACN,KAAKE;QACL,OAAOD,KAAK,GAAG,CACbL,KAAK,IAAI,GAAGI,eAAeJ,KAAK,KAAK,GAAGG,YACxCF,WAAW,KAAK,GAAGG;QAErB,QAAQC,KAAK,GAAG,CACdL,KAAK,GAAG,GAAGM,cAAcN,KAAK,MAAM,GAAGG,YACvCF,WAAW,MAAM,GAAGK;IAExB;IAGA,MAAME,cAAcD,WAAW,KAAK,GAAGA,WAAW,MAAM;IAExD,IAAIC,eAAeN,SACjB,OAAOK;IAIT,MAAME,UAAUF,WAAW,IAAI,GAAGA,WAAW,KAAK,GAAG;IACrD,MAAMG,UAAUH,WAAW,GAAG,GAAGA,WAAW,MAAM,GAAG;IAGrD,MAAMI,cAAcN,KAAK,IAAI,CAACH,UAAUM;IACxC,MAAMI,WAAWP,KAAK,KAAK,CAACE,WAAW,KAAK,GAAGI;IAC/C,MAAME,YAAYR,KAAK,KAAK,CAACE,WAAW,MAAM,GAAGI;IAGjD,MAAMG,UAAUT,KAAK,KAAK,CAACI,UAAUG,WAAW;IAChD,MAAMG,SAASV,KAAK,KAAK,CAACK,UAAUG,YAAY;IAGhD,MAAMG,OAAOX,KAAK,GAAG,CAACS,SAAS;IAC/B,MAAMG,MAAMZ,KAAK,GAAG,CAACU,QAAQ;IAE7B,OAAO;QACLC;QACAC;QACA,OAAOZ,KAAK,GAAG,CAACO,UAAUX,WAAW,KAAK,GAAGe;QAC7C,QAAQX,KAAK,GAAG,CAACQ,WAAWZ,WAAW,MAAM,GAAGgB;IAClD;AACF;AAEO,eAAeC,kBACpBC,gBAAwB,EACxBC,IAAkC,EAClCC,IAAU;IAEV,MAAMC,eAAeC,WAAWH;IAChC,MAAMI,kCAAkCF,aAAc,MAAM,CAC1D,CAACG;QACC,IAAIA,YAAY,UAAU,CAAC,QAAQ,KAAKC,SAAS,IAAI,EACnD,OAAO;QAET,OAAO;IACT;IAGF,MAAMC,eAAe,MAAMC,wBAAwB;QACjD,gBAAgBT;QAChB,sBAAsBK;QACtBH;IACF;IACA,OAAOM;AACT;AAEO,SAASE,uBACdC,KAAuB,EACvBC,WAAgC;IAEhC,MAAMC,OAA+B,EAAE;IAEvC,KAAK,MAAMC,QAAQH,MAAO;QACxB,MAAMI,OAAOD,KAAK,IAAI;QAEtB,MAAME,SAASJ,YAAY,IAAI,CAAC,CAACI,SAAWA,OAAO,IAAI,KAAKD;QAC5D,IAAI,CAACC,QAAQ;YACXC,QAAQ,IAAI,CACV,CAAC,sBAAsB,EAAEF,KAAK,8BAA8B,CAAC;YAE/D;QACF;QAEA,MAAMG,UAAUF,OAAO,cAAc,IAAID;QACzC,MAAMI,YAAYH,OAAO,WAAW,GAChCI,gBAAgBN,KAAK,KAAK,IAAI,CAAC,GAAGE,OAAO,WAAW,IACpD,CAAC;QAOL,MAAMK,gBACJL,AAAgB,aAAhBA,OAAO,IAAI,IAAiBA,AAA0B,aAA1BA,OAAO,cAAc,GAC7C,QACAA,AAAgB,gBAAhBA,OAAO,IAAI,IAAoBA,AAA0B,gBAA1BA,OAAO,cAAc,GAClD,QACAA,AAAgB,kBAAhBA,OAAO,IAAI,IACTA,AAA0B,kBAA1BA,OAAO,cAAc,IACrBA,AAAgB,kBAAhBA,OAAO,IAAI,IACXA,AAA0B,kBAA1BA,OAAO,cAAc,GACrB,YACAM;QACV,MAAMC,eAAeF,gBAAgBG,OAAO,IAAI,CAACL,aAAa,EAAE;QAChE,MAAMM,oBACJJ,iBACAE,AAAwB,MAAxBA,aAAa,MAAM,IACnBA,YAAY,CAAC,EAAE,KAAKF,iBACpB,AAAoC,YAApC,OAAOF,SAAS,CAACE,cAAc;QAEjC,MAAMK,WAAiCD,oBACnC;YAAE,CAACP,QAAQ,EAAEC,SAAS,CAACE,cAAwB;QAAC,IAChD;YAAE,CAACH,QAAQ,EAAE;YAAI,GAAGC,SAAS;QAAC;QAElCN,KAAK,IAAI,CAACa;IACZ;IAEA,OAAOb;AACT;AAGO,MAAMc,cAAcC,EAAE,MAAM,CAAC;IAClC,MAAMA,EAAE,MAAM;IACd,KAAKA,EAAE,MAAM;AACf;AAEO,MAAMC,aAAaD,EAAE,MAAM,CAAC;IACjC,OAAOA,EAAE,MAAM;IACf,QAAQA,EAAE,MAAM;AAClB;AAEO,MAAME,aAAaH,YAAY,GAAG,CAACE,YAAY,GAAG,CACvDD,EAAE,MAAM,CAAC;IACP,MAAMA,EAAE,MAAM,GAAG,QAAQ;AAC3B;AAIK,MAAMG,0BAA0BH,EAAE,MAAM,CAAC;IAC9C,QAAQA,EAAAA,KACA,CACJA,EAAE,MAAM,CAAC;QACP,MAAMA,EAAE,MAAM;QACd,KAAKA,EAAE,MAAM;IACf,IAED,QAAQ;IACX,yBAAyBA,EAAE,OAAO,GAAG,QAAQ;AAC/C;AAGO,MAAMI,oBAAoBJ,EAAE,KAAK,CAAC;IACvCA,EAAE,MAAM;IACRA,EAAAA,MACS,CAAC;QACN,QAAQA,EAAE,MAAM;IAClB,GACC,GAAG,CAACG,wBAAwB,OAAO;CACvC;AAMM,MAAME,qBAAqB,CAACC,SAC1B,AAAkB,YAAlB,OAAOA,SAAsBA,SAASA,OAAO,MAAM;AAGrD,MAAMC,+BAA+B,CAC1CD;IAEA,IAAI,AAAkB,YAAlB,OAAOA,UAAuB,CAACA,OAAO,MAAM,EAC9C;IAEF,OAAO;QACL,QAAQA,OAAO,MAAM;QACrB,yBAAyB,CAAC,CAACA,OAAO,uBAAuB;IAC3D;AACF;AAEO,MAAME,iCAAiC,OAC5CC;IAEA,MAAMC,OAAyC,EAAE;IACjD,IAAID,kBAAkB,QAAQ,QAAQ;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQF,iBAAiB,MAAM,CAAE;YAC1C,MAAM7B,eAAe,MAAMgC,mBACzBD,KAAK,GAAG,EACR,CAAC,CAACF,iBAAiB,uBAAuB;YAG5CC,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,mCAAmC,EAAEC,KAAK,IAAI,CAAC,uDAAuD,CAAC;oBAChH;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAK9B;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAO8B;AACT;AAEA,MAAMG,sBAAsB;AAG5B,MAAMC,wBAAwBd,EAAAA,MACrB,CAAC;IACN,QAAQI;IACR,YAAYJ,EAAE,OAAO,GAAG,QAAQ;IAChC,WAAWA,EAAAA,OACD,GACP,QAAQ,GACR,QAAQ,CAAC;IACZ,WAAWA,EAAE,OAAO,GAAG,QAAQ;IAC/B,OAAOA,EAAE,KAAK,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,OAAO;KAAG,EAAE,QAAQ;AACpD,GACC,WAAW;AAMP,MAAMe,4BAA4B,IAChCD;AAGF,MAAME,yBAAyB,CAACC;IAErC,IAAIC,cAAcD;IAClB,IAAIC,YAAY,IAAI,EAAE,aAAa,eACjCA,cAAcA,YAAY,IAAI,CAAC,SAAS;IAI1C,IAAIA,YAAY,IAAI,EAAE,aAAa,aAAa;QAC9C,MAAMC,QAAQD,YAAY,IAAI,CAAC,KAAK;QAGpC,IAAIL,uBAAuBM,OACzB,OAAO;QAKT,IAAI,YAAYA,SAASA,MAAM,MAAM,EACnC,OAAO;IAEX;IAEA,OAAO;AACT;AAEA,MAAMC,yBAAyB,CAC7BC;IAEA,IAAIC,eAAeD,UAAU,MAAM;IACnC,IAAIE,MAAM,OAAO,CAACF,UAAU,MAAM,KAAKA,UAAU,MAAM,CAAC,MAAM,GAAG,GAAG;QAClE,MAAMG,aAAaH,UAAU,MAAM,CAAC,MAAM;QAC1CC,gBAAgB,CAAC,OAAO,EAAEE,WAAW,MAAM,EAAEA,aAAa,IAAI,MAAM,GAAG,CAAC,CAAC;IAC3E;IACA,OAAOF;AACT;AAEO,MAAMG,2BAA2B,CAACR;IACvCS,OACEV,uBAAuBC,QACvB;IAIF,IAAI,AAAiB,YAAjB,OAAOA,OACT,OAAOA;IAIT,IAAIA,SAAS,AAAiB,YAAjB,OAAOA,SAAsBA,MAAM,MAAM,EAAE;QAEtD,IAAI,AAAwB,YAAxB,OAAOA,MAAM,MAAM,EACrB,OAAOA,MAAM,MAAM;QAGrB,IAAI,AAAwB,YAAxB,OAAOA,MAAM,MAAM,IAAiBA,MAAM,MAAM,CAAC,MAAM,EACzD,OAAOG,uBAAuBH,MAAM,MAAM;IAE9C;IAGA,OAAOU,OAAOV;AAChB;AAEO,MAAMW,8BAA8B,CACzCC,SACAC;IAEA,IAAI,CAACD,SACH,OAAO,EAAE;IAIX,MAAME,YAAYF;IAClB,IAAIE,UAAU,IAAI,EAAE,aAAa,eAAeA,UAAU,KAAK,EAAE;QAC/D,MAAMC,OAAOpC,OAAO,IAAI,CAACmC,UAAU,KAAK;QACxC,OAAOC,KAAK,MAAM,CAAC,CAACC;YAClB,MAAMhB,QAAQc,UAAU,KAAK,CAACE,IAAI;YAClC,IAAI,CAACjB,uBAAuBC,QAC1B,OAAO;YAIT,IAAIa,cACF,OAAOb,MAAM,IAAI,EAAE,aAAa;YAGlC,OAAO;QACT;IACF;IAGA,OAAO,EAAE;AACX;AAEO,MAAMzB,kBAAkB,CAC7B0C,YACAC;IAGA,IAAI,CAACC,cAAcF,aACjB,OAAO,CAAC;IAGV,MAAMG,gBAAgBT,4BAA4BO;IAClD,MAAMG,SAAS;QAAE,GAAGJ,UAAU;IAAC;IAE/B,KAAK,MAAMK,aAAaF,cAAe;QACrC,MAAMG,aAAaF,MAAM,CAACC,UAAU;QACpC,IAAIC,YAEF;YAAA,IAAI,AAAsB,YAAtB,OAAOA,YACTF,MAAM,CAACC,UAAU,GAAGC;iBACf,IAAI,AAAsB,YAAtB,OAAOA,YAEhB;gBAAA,IAAIA,WAAW,MAAM,EAEnB;oBAAA,IAAI,AAA6B,YAA7B,OAAOA,WAAW,MAAM,EAC1BF,MAAM,CAACC,UAAU,GAAGC,WAAW,MAAM;yBAChC,IACL,AAA6B,YAA7B,OAAOA,WAAW,MAAM,IACxBA,WAAW,MAAM,CAAC,MAAM,EAGxBF,MAAM,CAACC,UAAU,GAAGnB,uBAAuBoB,WAAW,MAAM;gBAC9D;YACF;QACF;IAEJ;IAEA,OAAOF;AACT;AAaO,MAAMG,mBAAmB,CAC9BC,UACAP,WACAQ;IAGA,IAAI,CAACR,WACH;IAIF,MAAMS,QAAQF,YAAY,CAAC;IAG3B,MAAMG,eAAejB,4BAA4BO;IAGjD,IAAIU,AAAwB,MAAxBA,aAAa,MAAM,EACrB,OAAOV,UAAU,KAAK,CAACS;IAIzB,MAAME,oBAAyC,CAAC;IAChD,KAAK,MAAMP,aAAaM,aACtB,IAAIN,aAAaK,OACfE,iBAAiB,CAACP,UAAU,GAAGK,KAAK,CAACL,UAAU;IAKnD,MAAMQ,sBAA2C,CAAC;IAClD,IAAK,MAAMd,OAAOW,MAChB,IAAIC,aAAa,QAAQ,CAACZ,MAExBc,mBAAmB,CAACd,IAAI,GAAG;QAAE,QAAQ;IAAU;SAE/Cc,mBAAmB,CAACd,IAAI,GAAGW,KAAK,CAACX,IAAI;IAKzC,MAAMe,YAAYb,UAAU,KAAK,CAACY;IAIlC,MAAME,QAAQN,SAAS;IACvB,IAAK,MAAMJ,aAAaO,kBAAmB;QACzC,IAAII,QAAQJ,iBAAiB,CAACP,UAAU;QACxC,IACEU,AAAUvD,WAAVuD,SACAA,AAAU,MAAVA,SACAC,SACA,AAAiB,YAAjB,OAAOA,SACPA,MAAM,MAAM,IACZA,MAAM,IAAI,EAEVA,QAAQ;YACN,GAAGA,KAAK;YACR,QAAQ;gBACN5F,KAAK,KAAK,CAAC4F,MAAM,MAAM,CAAC,EAAE,GAAGD;gBAC7B3F,KAAK,KAAK,CAAC4F,MAAM,MAAM,CAAC,EAAE,GAAGD;aAC9B;YACD,MAAM;gBACJ,GAAGC,MAAM,IAAI;gBACb,MAAM5F,KAAK,KAAK,CAAC4F,MAAM,IAAI,CAAC,IAAI,GAAGD;gBACnC,KAAK3F,KAAK,KAAK,CAAC4F,MAAM,IAAI,CAAC,GAAG,GAAGD;gBACjC,OAAO3F,KAAK,KAAK,CAAC4F,MAAM,IAAI,CAAC,KAAK,GAAGD;gBACrC,QAAQ3F,KAAK,KAAK,CAAC4F,MAAM,IAAI,CAAC,MAAM,GAAGD;YACzC;QACF;QAEFD,SAAS,CAACT,UAAU,GAAGW;IACzB;IAEA,OAAOF;AACT;AAEO,MAAMG,qBAAqB;AAQ3B,MAAMC,wBAAwB,CACnCC,SAAS,qBAAqB,EAC9BC;IAEA,MAAMC,MAAMD,AAAc5D,WAAd4D,YAA0B,IAAIE,KAAKF,aAAa,IAAIE;IAChE,MAAMC,OAAOF,IAAI,WAAW;IAC5B,MAAMG,QAAQ/B,OAAO4B,IAAI,QAAQ,KAAK,GAAG,QAAQ,CAAC,GAAG;IACrD,MAAMI,MAAMhC,OAAO4B,IAAI,OAAO,IAAI,QAAQ,CAAC,GAAG;IAC9C,MAAMK,QAAQjC,OAAO4B,IAAI,QAAQ,IAAI,QAAQ,CAAC,GAAG;IACjD,MAAMM,UAAUlC,OAAO4B,IAAI,UAAU,IAAI,QAAQ,CAAC,GAAG;IACrD,MAAMO,UAAUnC,OAAO4B,IAAI,UAAU,IAAI,QAAQ,CAAC,GAAG;IAErD,MAAMQ,aAAaV,OAChB,OAAO,CAAC,QAAQ1B,OAAO8B,OACvB,OAAO,CAAC,MAAMC,OACd,OAAO,CAAC,MAAMC,KACd,OAAO,CAAC,MAAMC,OACd,OAAO,CAAC,MAAMC,SACd,OAAO,CAAC,MAAMC;IAEjB,OAAO,GAAGC,WAAW,EAAE,EAAEV,OAAO,CAAC,CAAC;AACpC"}