npm - @midscene/core - Versions diffs - 1.0.1-beta-20251202112442.0 → 1.0.1-beta-20251203073716.0 - Mend

@midscene/core 1.0.1-beta-20251202112442.0 → 1.0.1-beta-20251203073716.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (132) hide show

package/dist/es/agent/agent.mjs +33 -40
package/dist/es/agent/agent.mjs.map +1 -1
package/dist/es/agent/execution-session.mjs.map +1 -1
package/dist/es/agent/task-builder.mjs +25 -25
package/dist/es/agent/task-builder.mjs.map +1 -1
package/dist/es/agent/task-cache.mjs +3 -3
package/dist/es/agent/task-cache.mjs.map +1 -1
package/dist/es/agent/tasks.mjs +13 -14
package/dist/es/agent/tasks.mjs.map +1 -1
package/dist/es/agent/ui-utils.mjs +9 -21
package/dist/es/agent/ui-utils.mjs.map +1 -1
package/dist/es/agent/utils.mjs +5 -8
package/dist/es/agent/utils.mjs.map +1 -1
package/dist/es/ai-model/conversation-history.mjs +1 -2
package/dist/es/ai-model/conversation-history.mjs.map +1 -1
package/dist/es/ai-model/inspect.mjs +6 -9
package/dist/es/ai-model/inspect.mjs.map +1 -1
package/dist/es/ai-model/llm-planning.mjs.map +1 -1
package/dist/es/ai-model/prompt/assertion.mjs.map +1 -1
package/dist/es/ai-model/prompt/common.mjs.map +1 -1
package/dist/es/ai-model/prompt/describe.mjs.map +1 -1
package/dist/es/ai-model/prompt/extraction.mjs.map +1 -1
package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
package/dist/es/ai-model/prompt/llm-planning.mjs +6 -12
package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -1
package/dist/es/ai-model/prompt/playwright-generator.mjs +2 -2
package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
package/dist/es/ai-model/prompt/ui-tars-locator.mjs.map +1 -1
package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -1
package/dist/es/ai-model/prompt/util.mjs.map +1 -1
package/dist/es/ai-model/prompt/yaml-generator.mjs +10 -10
package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
package/dist/es/ai-model/service-caller/index.mjs +11 -14
package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
package/dist/es/ai-model/ui-tars-planning.mjs +68 -7
package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
package/dist/es/common.mjs +6 -11
package/dist/es/common.mjs.map +1 -1
package/dist/es/device/index.mjs.map +1 -1
package/dist/es/index.mjs.map +1 -1
package/dist/es/report.mjs.map +1 -1
package/dist/es/service/index.mjs +6 -8
package/dist/es/service/index.mjs.map +1 -1
package/dist/es/service/utils.mjs.map +1 -1
package/dist/es/task-runner.mjs +17 -21
package/dist/es/task-runner.mjs.map +1 -1
package/dist/es/tree.mjs.map +1 -1
package/dist/es/types.mjs.map +1 -1
package/dist/es/utils.mjs +6 -7
package/dist/es/utils.mjs.map +1 -1
package/dist/es/yaml/builder.mjs.map +1 -1
package/dist/es/yaml/player.mjs +11 -16
package/dist/es/yaml/player.mjs.map +1 -1
package/dist/es/yaml/utils.mjs +1 -1
package/dist/es/yaml/utils.mjs.map +1 -1
package/dist/lib/agent/agent.js +33 -40
package/dist/lib/agent/agent.js.map +1 -1
package/dist/lib/agent/execution-session.js.map +1 -1
package/dist/lib/agent/index.js +10 -10
package/dist/lib/agent/index.js.map +1 -1
package/dist/lib/agent/task-builder.js +25 -25
package/dist/lib/agent/task-builder.js.map +1 -1
package/dist/lib/agent/task-cache.js +5 -5
package/dist/lib/agent/task-cache.js.map +1 -1
package/dist/lib/agent/tasks.js +14 -15
package/dist/lib/agent/tasks.js.map +1 -1
package/dist/lib/agent/ui-utils.js +9 -21
package/dist/lib/agent/ui-utils.js.map +1 -1
package/dist/lib/agent/utils.js +12 -15
package/dist/lib/agent/utils.js.map +1 -1
package/dist/lib/ai-model/conversation-history.js +1 -2
package/dist/lib/ai-model/conversation-history.js.map +1 -1
package/dist/lib/ai-model/index.js +22 -22
package/dist/lib/ai-model/index.js.map +1 -1
package/dist/lib/ai-model/inspect.js +9 -12
package/dist/lib/ai-model/inspect.js.map +1 -1
package/dist/lib/ai-model/llm-planning.js.map +1 -1
package/dist/lib/ai-model/prompt/assertion.js.map +1 -1
package/dist/lib/ai-model/prompt/common.js.map +1 -1
package/dist/lib/ai-model/prompt/describe.js.map +1 -1
package/dist/lib/ai-model/prompt/extraction.js.map +1 -1
package/dist/lib/ai-model/prompt/llm-locator.js +2 -2
package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
package/dist/lib/ai-model/prompt/llm-planning.js +6 -12
package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -1
package/dist/lib/ai-model/prompt/playwright-generator.js +9 -9
package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
package/dist/lib/ai-model/prompt/ui-tars-locator.js.map +1 -1
package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -1
package/dist/lib/ai-model/prompt/util.js.map +1 -1
package/dist/lib/ai-model/prompt/yaml-generator.js +16 -16
package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
package/dist/lib/ai-model/service-caller/index.js +14 -17
package/dist/lib/ai-model/service-caller/index.js.map +1 -1
package/dist/lib/ai-model/ui-tars-planning.js +68 -7
package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
package/dist/lib/common.js +24 -29
package/dist/lib/common.js.map +1 -1
package/dist/lib/device/device-options.js.map +1 -1
package/dist/lib/device/index.js +19 -19
package/dist/lib/device/index.js.map +1 -1
package/dist/lib/image/index.js +3 -3
package/dist/lib/image/index.js.map +1 -1
package/dist/lib/index.js +13 -13
package/dist/lib/index.js.map +1 -1
package/dist/lib/report.js.map +1 -1
package/dist/lib/service/index.js +6 -8
package/dist/lib/service/index.js.map +1 -1
package/dist/lib/service/utils.js.map +1 -1
package/dist/lib/task-runner.js +17 -21
package/dist/lib/task-runner.js.map +1 -1
package/dist/lib/tree.js +2 -2
package/dist/lib/tree.js.map +1 -1
package/dist/lib/types.js +7 -9
package/dist/lib/types.js.map +1 -1
package/dist/lib/utils.js +14 -15
package/dist/lib/utils.js.map +1 -1
package/dist/lib/yaml/builder.js.map +1 -1
package/dist/lib/yaml/index.js +12 -18
package/dist/lib/yaml/index.js.map +1 -1
package/dist/lib/yaml/player.js +11 -16
package/dist/lib/yaml/player.js.map +1 -1
package/dist/lib/yaml/utils.js +4 -4
package/dist/lib/yaml/utils.js.map +1 -1
package/dist/lib/yaml.js.map +1 -1
package/dist/types/ai-model/ui-tars-planning.d.ts +15 -2
package/dist/types/yaml.d.ts +44 -1
package/package.json +3 -3

package/dist/es/ai-model/llm-planning.mjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"ai-model/llm-planning.mjs","sources":["~~webpack://@midscene/core/./~~src/ai-model/llm-planning.ts"],"sourcesContent":["import type {\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n ThinkingStrategy,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n AIActionType,\n buildYamlFlowFromPlans,\n fillBboxParam,\n findAllMidsceneLocatorField,\n} from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nconst debug = getDebug('planning');\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory: ConversationHistory;\n includeBbox: boolean;\n thinkingStrategy: ThinkingStrategy;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { screenshotBase64, size } = context;\n\n const { vlMode } = modelConfig;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n vlMode,\n includeBbox: opts.includeBbox,\n thinkingStrategy: opts.thinkingStrategy,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = size.width;\n let imageHeight = size.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n\n // Process image based on VL mode requirements\n if (vlMode === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The last screenshot is attached. Please going on according to the instruction.`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'this is the latest screenshot',\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n const historyLog = conversationHistory.snapshot();\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n const {\n content: planFromAI,\n contentString: rawResponse,\n usage,\n } = await callAIWithObjectResponse<RawResponsePlanningAIResponse>(\n msgs,\n AIActionType.PLAN,\n modelConfig,\n );\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n yamlFlow: buildYamlFlowFromPlans(\n actions,\n opts.actionSpace,\n planFromAI.sleep,\n ),\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult && vlMode !== undefined) {\n // Always use VL mode to fill bbox parameters\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n rightLimit,\n bottomLimit,\n vlMode,\n );\n }\n });\n });\n // in Qwen-VL, error means error. In GPT-4o, error may mean more actions are needed.\n assert(!planFromAI.error, `Failed to plan actions: ${planFromAI.error}`);\n\n if (\n actions.length === 0 &&\n returnValue.more_actions_needed_by_instruction &&\n !returnValue.sleep\n ) {\n console.warn(\n 'No actions planned for the prompt, but model said more actions are needed:',\n userInstruction,\n );\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n}\n"],"names":["debug","getDebug","plan","userInstruction","opts","context","modelConfig","conversationHistory","screenshotBase64","size","vlMode","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","rightLimit","bottomLimit","paddedResult","paddingToMatchBlockByBase64","instruction","latestFeedbackMessage","historyLog","msgs","planFromAI","rawResponse","usage","callAIWithObjectResponse","AIActionType","actions","returnValue","buildYamlFlowFromPlans","assert","action","type","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","undefined","fillBboxParam","console"],"mappings":";;;;;;AAuBA,MAAMA,QAAQC,SAAS;AAEhB,eAAeC,KACpBC,eAAuB,EACvBC,IASC;IAED,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGH;IACtD,MAAM,EAAEI,gBAAgB,EAAEC,IAAI,EAAE,GAAGJ;IAEnC,MAAM,EAAEK,MAAM,EAAE,GAAGJ;IAEnB,MAAMK,eAAe,MAAMC,2BAA2B;QACpD,aAAaR,KAAK,WAAW;QAC7BM;QACA,aAAaN,KAAK,WAAW;QAC7B,kBAAkBA,KAAK,gBAAgB;IACzC;IAEA,IAAIS,eAAeL;IACnB,IAAIM,aAAaL,KAAK,KAAK;IAC3B,IAAIM,cAAcN,KAAK,MAAM;IAC7B,MAAMO,aAAaF;IACnB,MAAMG,cAAcF;IAGpB,IAAIL,AAAW,iBAAXA,QAAyB;QAC3B,MAAMQ,eAAe,MAAMC,4BAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAME,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,CAAC,yBAAyB,EAAEhB,KAAK,aAAa,CAAC,8CAA8C,EAAED,gBAAgB,mBAAmB,CAAC;gBAC3I;aACD;QACH;KACD;IAED,IAAIkB;IAEJ,IAAId,oBAAoB,sBAAsB,EAAE;QAC9Cc,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGd,oBAAoB,sBAAsB,CAAC,gFAAgF,CAAC;gBACvI;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKM;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAN,oBAAoB,mCAAmC;IACzD,OACEc,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM;YACR;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKR;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFN,oBAAoB,MAAM,CAACc;IAC3B,MAAMC,aAAaf,oBAAoB,QAAQ;IAE/C,MAAMgB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASZ;QAAa;WACrCS;WACAE;KACJ;IAED,MAAM,EACJ,SAASE,UAAU,EACnB,eAAeC,WAAW,EAC1BC,KAAK,EACN,GAAG,MAAMC,yBACRJ,MACAK,aAAa,IAAI,EACjBtB;IAGF,MAAMuB,UAAUL,WAAW,MAAM,GAAG;QAACA,WAAW,MAAM;KAAC,GAAG,EAAE;IAC5D,MAAMM,cAAkC;QACtC,GAAGN,UAAU;QACbK;QACAJ;QACAC;QACA,UAAUK,uBACRF,SACAzB,KAAK,WAAW,EAChBoB,WAAW,KAAK;IAEpB;IAEAQ,OAAOR,YAAY;IAEnBK,QAAQ,OAAO,CAAC,CAACI;QACf,MAAMC,OAAOD,OAAO,IAAI;QACxB,MAAME,sBAAsB/B,KAAK,WAAW,CAAC,IAAI,CAC/C,CAAC6B,SAAWA,OAAO,IAAI,KAAKC;QAG9BlC,MAAM,+BAA+BmC;QACrC,MAAMC,eAAeD,sBACjBE,4BAA4BF,oBAAoB,WAAW,IAC3D,EAAE;QAENnC,MAAM,gBAAgBoC;QAEtBA,aAAa,OAAO,CAAC,CAACE;YACpB,MAAMC,eAAeN,OAAO,KAAK,CAACK,MAAM;YACxC,IAAIC,gBAAgB7B,AAAW8B,WAAX9B,QAElBuB,OAAO,KAAK,CAACK,MAAM,GAAGG,cACpBF,cACAzB,YACAC,aACAC,YACAC,aACAP;QAGN;IACF;IAEAsB,OAAO,CAACR,WAAW,KAAK,EAAE,CAAC,wBAAwB,EAAEA,WAAW,KAAK,EAAE;IAEvE,IACEK,AAAmB,MAAnBA,QAAQ,MAAM,IACdC,YAAY,kCAAkC,IAC9C,CAACA,YAAY,KAAK,EAElBY,QAAQ,IAAI,CACV,8EACAvC;IAIJI,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAMkB;YACR;SACD;IACH;IAEA,OAAOK;AACT"}
1	+ {"version":3,"file":"ai-model/llm-planning.mjs","sources":["../../../src/ai-model/llm-planning.ts"],"sourcesContent":["import type {\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n ThinkingStrategy,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n AIActionType,\n buildYamlFlowFromPlans,\n fillBboxParam,\n findAllMidsceneLocatorField,\n} from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nconst debug = getDebug('planning');\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory: ConversationHistory;\n includeBbox: boolean;\n thinkingStrategy: ThinkingStrategy;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { screenshotBase64, size } = context;\n\n const { vlMode } = modelConfig;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n vlMode,\n includeBbox: opts.includeBbox,\n thinkingStrategy: opts.thinkingStrategy,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = size.width;\n let imageHeight = size.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n\n // Process image based on VL mode requirements\n if (vlMode === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The last screenshot is attached. Please going on according to the instruction.`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'this is the latest screenshot',\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n const historyLog = conversationHistory.snapshot();\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n const {\n content: planFromAI,\n contentString: rawResponse,\n usage,\n } = await callAIWithObjectResponse<RawResponsePlanningAIResponse>(\n msgs,\n AIActionType.PLAN,\n modelConfig,\n );\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n yamlFlow: buildYamlFlowFromPlans(\n actions,\n opts.actionSpace,\n planFromAI.sleep,\n ),\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult && vlMode !== undefined) {\n // Always use VL mode to fill bbox parameters\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n rightLimit,\n bottomLimit,\n vlMode,\n );\n }\n });\n });\n // in Qwen-VL, error means error. In GPT-4o, error may mean more actions are needed.\n assert(!planFromAI.error, `Failed to plan actions: ${planFromAI.error}`);\n\n if (\n actions.length === 0 &&\n returnValue.more_actions_needed_by_instruction &&\n !returnValue.sleep\n ) {\n console.warn(\n 'No actions planned for the prompt, but model said more actions are needed:',\n userInstruction,\n );\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n}\n"],"names":["debug","getDebug","plan","userInstruction","opts","context","modelConfig","conversationHistory","screenshotBase64","size","vlMode","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","rightLimit","bottomLimit","paddedResult","paddingToMatchBlockByBase64","instruction","latestFeedbackMessage","historyLog","msgs","planFromAI","rawResponse","usage","callAIWithObjectResponse","AIActionType","actions","returnValue","buildYamlFlowFromPlans","assert","action","type","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","undefined","fillBboxParam","console"],"mappings":";;;;;;AAuBA,MAAMA,QAAQC,SAAS;AAEhB,eAAeC,KACpBC,eAAuB,EACvBC,IASC;IAED,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGH;IACtD,MAAM,EAAEI,gBAAgB,EAAEC,IAAI,EAAE,GAAGJ;IAEnC,MAAM,EAAEK,MAAM,EAAE,GAAGJ;IAEnB,MAAMK,eAAe,MAAMC,2BAA2B;QACpD,aAAaR,KAAK,WAAW;QAC7BM;QACA,aAAaN,KAAK,WAAW;QAC7B,kBAAkBA,KAAK,gBAAgB;IACzC;IAEA,IAAIS,eAAeL;IACnB,IAAIM,aAAaL,KAAK,KAAK;IAC3B,IAAIM,cAAcN,KAAK,MAAM;IAC7B,MAAMO,aAAaF;IACnB,MAAMG,cAAcF;IAGpB,IAAIL,AAAW,iBAAXA,QAAyB;QAC3B,MAAMQ,eAAe,MAAMC,4BAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAME,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,CAAC,yBAAyB,EAAEhB,KAAK,aAAa,CAAC,8CAA8C,EAAED,gBAAgB,mBAAmB,CAAC;gBAC3I;aACD;QACH;KACD;IAED,IAAIkB;IAEJ,IAAId,oBAAoB,sBAAsB,EAAE;QAC9Cc,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGd,oBAAoB,sBAAsB,CAAC,gFAAgF,CAAC;gBACvI;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKM;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAN,oBAAoB,mCAAmC;IACzD,OACEc,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM;YACR;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKR;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFN,oBAAoB,MAAM,CAACc;IAC3B,MAAMC,aAAaf,oBAAoB,QAAQ;IAE/C,MAAMgB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASZ;QAAa;WACrCS;WACAE;KACJ;IAED,MAAM,EACJ,SAASE,UAAU,EACnB,eAAeC,WAAW,EAC1BC,KAAK,EACN,GAAG,MAAMC,yBACRJ,MACAK,aAAa,IAAI,EACjBtB;IAGF,MAAMuB,UAAUL,WAAW,MAAM,GAAG;QAACA,WAAW,MAAM;KAAC,GAAG,EAAE;IAC5D,MAAMM,cAAkC;QACtC,GAAGN,UAAU;QACbK;QACAJ;QACAC;QACA,UAAUK,uBACRF,SACAzB,KAAK,WAAW,EAChBoB,WAAW,KAAK;IAEpB;IAEAQ,OAAOR,YAAY;IAEnBK,QAAQ,OAAO,CAAC,CAACI;QACf,MAAMC,OAAOD,OAAO,IAAI;QACxB,MAAME,sBAAsB/B,KAAK,WAAW,CAAC,IAAI,CAC/C,CAAC6B,SAAWA,OAAO,IAAI,KAAKC;QAG9BlC,MAAM,+BAA+BmC;QACrC,MAAMC,eAAeD,sBACjBE,4BAA4BF,oBAAoB,WAAW,IAC3D,EAAE;QAENnC,MAAM,gBAAgBoC;QAEtBA,aAAa,OAAO,CAAC,CAACE;YACpB,MAAMC,eAAeN,OAAO,KAAK,CAACK,MAAM;YACxC,IAAIC,gBAAgB7B,AAAW8B,WAAX9B,QAElBuB,OAAO,KAAK,CAACK,MAAM,GAAGG,cACpBF,cACAzB,YACAC,aACAC,YACAC,aACAP;QAGN;IACF;IAEAsB,OAAO,CAACR,WAAW,KAAK,EAAE,CAAC,wBAAwB,EAAEA,WAAW,KAAK,EAAE;IAEvE,IACEK,AAAmB,MAAnBA,QAAQ,MAAM,IACdC,YAAY,kCAAkC,IAC9C,CAACA,YAAY,KAAK,EAElBY,QAAQ,IAAI,CACV,8EACAvC;IAIJI,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAMkB;YACR;SACD;IACH;IAEA,OAAOK;AACT"}

package/dist/es/ai-model/prompt/assertion.mjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"ai-model/prompt/assertion.mjs","sources":["~~webpack://@midscene/core/./~~src/ai-model/prompt/assertion.ts"],"sourcesContent":["import type { ResponseFormatJSONSchema } from 'openai/resources/index';\n\nexport const assertSchema: ResponseFormatJSONSchema = {\n type: 'json_schema',\n json_schema: {\n name: 'assert',\n strict: true,\n schema: {\n type: 'object',\n properties: {\n pass: {\n type: 'boolean',\n description: 'Whether the assertion passed or failed',\n },\n thought: {\n type: ['string', 'null'],\n description: 'The thought process behind the assertion',\n },\n },\n required: ['pass', 'thought'],\n additionalProperties: false,\n },\n },\n};\n"],"names":["assertSchema"],"mappings":"AAEO,MAAMA,eAAyC;IACpD,MAAM;IACN,aAAa;QACX,MAAM;QACN,QAAQ;QACR,QAAQ;YACN,MAAM;YACN,YAAY;gBACV,MAAM;oBACJ,MAAM;oBACN,aAAa;gBACf;gBACA,SAAS;oBACP,MAAM;wBAAC;wBAAU;qBAAO;oBACxB,aAAa;gBACf;YACF;YACA,UAAU;gBAAC;gBAAQ;aAAU;YAC7B,sBAAsB;QACxB;IACF;AACF"}
1	+ {"version":3,"file":"ai-model/prompt/assertion.mjs","sources":["../../../../src/ai-model/prompt/assertion.ts"],"sourcesContent":["import type { ResponseFormatJSONSchema } from 'openai/resources/index';\n\nexport const assertSchema: ResponseFormatJSONSchema = {\n type: 'json_schema',\n json_schema: {\n name: 'assert',\n strict: true,\n schema: {\n type: 'object',\n properties: {\n pass: {\n type: 'boolean',\n description: 'Whether the assertion passed or failed',\n },\n thought: {\n type: ['string', 'null'],\n description: 'The thought process behind the assertion',\n },\n },\n required: ['pass', 'thought'],\n additionalProperties: false,\n },\n },\n};\n"],"names":["assertSchema"],"mappings":"AAEO,MAAMA,eAAyC;IACpD,MAAM;IACN,aAAa;QACX,MAAM;QACN,QAAQ;QACR,QAAQ;YACN,MAAM;YACN,YAAY;gBACV,MAAM;oBACJ,MAAM;oBACN,aAAa;gBACf;gBACA,SAAS;oBACP,MAAM;wBAAC;wBAAU;qBAAO;oBACxB,aAAa;gBACf;YACF;YACA,UAAU;gBAAC;gBAAQ;aAAU;YAC7B,sBAAsB;QACxB;IACF;AACF"}

package/dist/es/ai-model/prompt/common.mjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"ai-model/prompt/common.mjs","sources":["~~webpack://@midscene/core/./~~src/ai-model/prompt/common.ts"],"sourcesContent":["import type { TVlModeTypes } from '@midscene/shared/env';\nexport function bboxDescription(vlMode: TVlModeTypes \| undefined) {\n if (vlMode === 'gemini') {\n return 'box_2d bounding box for the target element, should be [ymin, xmin, ymax, xmax] normalized to 0-1000.';\n }\n return '2d bounding box as [xmin, ymin, xmax, ymax]';\n}\n"],"names":["bboxDescription","vlMode"],"mappings":"AACO,SAASA,gBAAgBC,MAAgC;IAC9D,IAAIA,AAAW,aAAXA,QACF,OAAO;IAET,OAAO;AACT"}
1	+ {"version":3,"file":"ai-model/prompt/common.mjs","sources":["../../../../src/ai-model/prompt/common.ts"],"sourcesContent":["import type { TVlModeTypes } from '@midscene/shared/env';\nexport function bboxDescription(vlMode: TVlModeTypes \| undefined) {\n if (vlMode === 'gemini') {\n return 'box_2d bounding box for the target element, should be [ymin, xmin, ymax, xmax] normalized to 0-1000.';\n }\n return '2d bounding box as [xmin, ymin, xmax, ymax]';\n}\n"],"names":["bboxDescription","vlMode"],"mappings":"AACO,SAASA,gBAAgBC,MAAgC;IAC9D,IAAIA,AAAW,aAAXA,QACF,OAAO;IAET,OAAO;AACT"}

package/dist/es/ai-model/prompt/describe.mjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"ai-model/prompt/describe.mjs","sources":["~~webpack://@midscene/core/./~~src/ai-model/prompt/describe.ts"],"sourcesContent":["import { getPreferredLanguage } from '@midscene/shared/env';\n\nexport const elementDescriberInstruction = () => {\n return `\nDescribe the element in the red rectangle for precise identification. Use ${getPreferredLanguage()}.\n\nCRITICAL REQUIREMENTS:\n1. UNIQUENESS: The description must uniquely identify this element on the current page\n2. UNIVERSALITY: Use generic, reusable selectors that work across different contexts\n3. PRECISION: Be specific enough to distinguish from similar elements\n\nDESCRIPTION STRUCTURE:\n1. Element type (button, input, link, div, etc.)\n2. Primary identifier (in order of preference):\n - Unique text content: \"with text 'Login'\"\n - Unique attribute: \"with aria-label 'Search'\"\n - Unique class/ID: \"with class 'primary-button'\"\n - Unique position: \"in header navigation\"\n3. Secondary identifiers (if needed for uniqueness):\n - Visual features: \"blue background\", \"with icon\"\n - Relative position: \"below search bar\", \"in sidebar\"\n - Parent context: \"in login form\", \"in main menu\"\n\nGUIDELINES:\n- Keep description under 25 words\n- Prioritize semantic identifiers over visual ones\n- Use consistent terminology across similar elements\n- Avoid page-specific or temporary content\n- Don't mention the red rectangle or selection box\n- Focus on stable, reusable characteristics\n\nEXAMPLES:\n- \"Login button with text 'Sign In'\"\n- \"Search input with placeholder 'Enter keywords'\"\n- \"Navigation link with text 'Home' in header\"\n- \"Submit button in contact form\"\n- \"Menu icon with aria-label 'Open menu'\"\n\nReturn JSON:\n{\n \"description\": \"unique element identifier\",\n \"error\"?: \"error message if any\"\n}`;\n};\n"],"names":["elementDescriberInstruction","getPreferredLanguage"],"mappings":";AAEO,MAAMA,8BAA8B,IAClC,CAAC;0EACgE,EAAEC,uBAAuB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAsClG,CAAC"}
1	+ {"version":3,"file":"ai-model/prompt/describe.mjs","sources":["../../../../src/ai-model/prompt/describe.ts"],"sourcesContent":["import { getPreferredLanguage } from '@midscene/shared/env';\n\nexport const elementDescriberInstruction = () => {\n return `\nDescribe the element in the red rectangle for precise identification. Use ${getPreferredLanguage()}.\n\nCRITICAL REQUIREMENTS:\n1. UNIQUENESS: The description must uniquely identify this element on the current page\n2. UNIVERSALITY: Use generic, reusable selectors that work across different contexts\n3. PRECISION: Be specific enough to distinguish from similar elements\n\nDESCRIPTION STRUCTURE:\n1. Element type (button, input, link, div, etc.)\n2. Primary identifier (in order of preference):\n - Unique text content: \"with text 'Login'\"\n - Unique attribute: \"with aria-label 'Search'\"\n - Unique class/ID: \"with class 'primary-button'\"\n - Unique position: \"in header navigation\"\n3. Secondary identifiers (if needed for uniqueness):\n - Visual features: \"blue background\", \"with icon\"\n - Relative position: \"below search bar\", \"in sidebar\"\n - Parent context: \"in login form\", \"in main menu\"\n\nGUIDELINES:\n- Keep description under 25 words\n- Prioritize semantic identifiers over visual ones\n- Use consistent terminology across similar elements\n- Avoid page-specific or temporary content\n- Don't mention the red rectangle or selection box\n- Focus on stable, reusable characteristics\n\nEXAMPLES:\n- \"Login button with text 'Sign In'\"\n- \"Search input with placeholder 'Enter keywords'\"\n- \"Navigation link with text 'Home' in header\"\n- \"Submit button in contact form\"\n- \"Menu icon with aria-label 'Open menu'\"\n\nReturn JSON:\n{\n \"description\": \"unique element identifier\",\n \"error\"?: \"error message if any\"\n}`;\n};\n"],"names":["elementDescriberInstruction","getPreferredLanguage"],"mappings":";AAEO,MAAMA,8BAA8B,IAClC,CAAC;0EACgE,EAAEC,uBAAuB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAsClG,CAAC"}

package/dist/es/ai-model/prompt/extraction.mjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"ai-model/prompt/extraction.mjs","sources":["~~webpack://@midscene/core/./~~src/ai-model/prompt/extraction.ts"],"sourcesContent":["import type { ResponseFormatJSONSchema } from 'openai/resources/index';\n\nexport function systemPromptToExtract() {\n return `\nYou are a versatile professional in software UI design and testing. Your outstanding contributions will impact the user experience of billions of users.\n\nThe user will give you a screenshot, the contents of it (optional), and some data requirements in <DATA_DEMAND>. You need to understand the user's requirements and extract the data satisfying the <DATA_DEMAND>.\n\nIf a key specifies a JSON data type (such as Number, String, Boolean, Object, Array), ensure the returned value strictly matches that data type.\n\nIf the user provides multiple reference images, please carefully review the reference images with the screenshot and provide the correct answer for <DATA_DEMAND>.\n\n\nReturn in the following JSON format:\n{\n thought: string, // the thinking process of the extraction, less then 300 words\n data: any, // the extracted data. Make sure both the value and scheme meet the DATA_DEMAND. If you want to write some description in this field, use the same language as the DATA_DEMAND.\n errors: [], // string[], error message if any\n}\n\n# Example 1\nFor example, if the DATA_DEMAND is:\n\n<DATA_DEMAND>\n{\n \"name\": \"name shows on the left panel, string\",\n \"age\": \"age shows on the right panel, number\",\n \"isAdmin\": \"if the user is admin, boolean\"\n}\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n{\n thought: \"According to the screenshot, i can see ...\",\n data: {\n name: \"John\",\n age: 30,\n isAdmin: true\n },\n}\n\n# Example 2\nIf the DATA_DEMAND is:\n\n<DATA_DEMAND>\nthe todo items list, string[]\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n{\n thought: \"According to the screenshot, i can see ...\",\n data: [\"todo 1\", \"todo 2\", \"todo 3\"],\n}\n\n# Example 3\nIf the DATA_DEMAND is:\n\n<DATA_DEMAND>\nthe page title, string\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n{\n thought: \"According to the screenshot, i can see ...\",\n data: \"todo list\",\n}\n\n# Example 4\nIf the DATA_DEMAND is:\n\n<DATA_DEMAND>\n{\n \"result\": \"Boolean, is it currently the SMS page?\"\n}\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n{\n thought: \"According to the screenshot, i can see ...\",\n data: { result: true },\n}\n`;\n}\n\nexport const extractDataQueryPrompt = (\n pageDescription: string,\n dataQuery: string \| Record<string, string>,\n) => {\n let dataQueryText = '';\n if (typeof dataQuery === 'string') {\n dataQueryText = dataQuery;\n } else {\n dataQueryText = JSON.stringify(dataQuery, null, 2);\n }\n\n return `\n<PageDescription>\n${pageDescription}\n</PageDescription>\n\n<DATA_DEMAND>\n${dataQueryText}\n</DATA_DEMAND>\n `;\n};\n\nexport const extractDataSchema: ResponseFormatJSONSchema = {\n type: 'json_schema',\n json_schema: {\n name: 'extract_data',\n strict: true,\n schema: {\n type: 'object',\n properties: {\n data: {\n type: 'object',\n description: 'The extracted data',\n },\n errors: {\n type: 'array',\n items: {\n type: 'string',\n },\n description: 'Error messages, if any',\n },\n },\n required: ['data', 'errors'],\n additionalProperties: false,\n },\n },\n};\n"],"names":["systemPromptToExtract","extractDataQueryPrompt","pageDescription","dataQuery","dataQueryText","JSON","extractDataSchema"],"mappings":"AAEO,SAASA;IACd,OAAO,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAkFV,CAAC;AACD;AAEO,MAAMC,yBAAyB,CACpCC,iBACAC;IAEA,IAAIC,gBAAgB;IAElBA,gBADE,AAAqB,YAArB,OAAOD,YACOA,YAEAE,KAAK,SAAS,CAACF,WAAW,MAAM;IAGlD,OAAO,CAAC;;AAEV,EAAED,gBAAgB;;;;AAIlB,EAAEE,cAAc;;EAEd,CAAC;AACH;AAEO,MAAME,oBAA8C;IACzD,MAAM;IACN,aAAa;QACX,MAAM;QACN,QAAQ;QACR,QAAQ;YACN,MAAM;YACN,YAAY;gBACV,MAAM;oBACJ,MAAM;oBACN,aAAa;gBACf;gBACA,QAAQ;oBACN,MAAM;oBACN,OAAO;wBACL,MAAM;oBACR;oBACA,aAAa;gBACf;YACF;YACA,UAAU;gBAAC;gBAAQ;aAAS;YAC5B,sBAAsB;QACxB;IACF;AACF"}
1	+ {"version":3,"file":"ai-model/prompt/extraction.mjs","sources":["../../../../src/ai-model/prompt/extraction.ts"],"sourcesContent":["import type { ResponseFormatJSONSchema } from 'openai/resources/index';\n\nexport function systemPromptToExtract() {\n return `\nYou are a versatile professional in software UI design and testing. Your outstanding contributions will impact the user experience of billions of users.\n\nThe user will give you a screenshot, the contents of it (optional), and some data requirements in <DATA_DEMAND>. You need to understand the user's requirements and extract the data satisfying the <DATA_DEMAND>.\n\nIf a key specifies a JSON data type (such as Number, String, Boolean, Object, Array), ensure the returned value strictly matches that data type.\n\nIf the user provides multiple reference images, please carefully review the reference images with the screenshot and provide the correct answer for <DATA_DEMAND>.\n\n\nReturn in the following JSON format:\n{\n thought: string, // the thinking process of the extraction, less then 300 words\n data: any, // the extracted data. Make sure both the value and scheme meet the DATA_DEMAND. If you want to write some description in this field, use the same language as the DATA_DEMAND.\n errors: [], // string[], error message if any\n}\n\n# Example 1\nFor example, if the DATA_DEMAND is:\n\n<DATA_DEMAND>\n{\n \"name\": \"name shows on the left panel, string\",\n \"age\": \"age shows on the right panel, number\",\n \"isAdmin\": \"if the user is admin, boolean\"\n}\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n{\n thought: \"According to the screenshot, i can see ...\",\n data: {\n name: \"John\",\n age: 30,\n isAdmin: true\n },\n}\n\n# Example 2\nIf the DATA_DEMAND is:\n\n<DATA_DEMAND>\nthe todo items list, string[]\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n{\n thought: \"According to the screenshot, i can see ...\",\n data: [\"todo 1\", \"todo 2\", \"todo 3\"],\n}\n\n# Example 3\nIf the DATA_DEMAND is:\n\n<DATA_DEMAND>\nthe page title, string\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n{\n thought: \"According to the screenshot, i can see ...\",\n data: \"todo list\",\n}\n\n# Example 4\nIf the DATA_DEMAND is:\n\n<DATA_DEMAND>\n{\n \"result\": \"Boolean, is it currently the SMS page?\"\n}\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n{\n thought: \"According to the screenshot, i can see ...\",\n data: { result: true },\n}\n`;\n}\n\nexport const extractDataQueryPrompt = (\n pageDescription: string,\n dataQuery: string \| Record<string, string>,\n) => {\n let dataQueryText = '';\n if (typeof dataQuery === 'string') {\n dataQueryText = dataQuery;\n } else {\n dataQueryText = JSON.stringify(dataQuery, null, 2);\n }\n\n return `\n<PageDescription>\n${pageDescription}\n</PageDescription>\n\n<DATA_DEMAND>\n${dataQueryText}\n</DATA_DEMAND>\n `;\n};\n\nexport const extractDataSchema: ResponseFormatJSONSchema = {\n type: 'json_schema',\n json_schema: {\n name: 'extract_data',\n strict: true,\n schema: {\n type: 'object',\n properties: {\n data: {\n type: 'object',\n description: 'The extracted data',\n },\n errors: {\n type: 'array',\n items: {\n type: 'string',\n },\n description: 'Error messages, if any',\n },\n },\n required: ['data', 'errors'],\n additionalProperties: false,\n },\n },\n};\n"],"names":["systemPromptToExtract","extractDataQueryPrompt","pageDescription","dataQuery","dataQueryText","JSON","extractDataSchema"],"mappings":"AAEO,SAASA;IACd,OAAO,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAkFV,CAAC;AACD;AAEO,MAAMC,yBAAyB,CACpCC,iBACAC;IAEA,IAAIC,gBAAgB;IAElBA,gBADE,AAAqB,YAArB,OAAOD,YACOA,YAEAE,KAAK,SAAS,CAACF,WAAW,MAAM;IAGlD,OAAO,CAAC;;AAEV,EAAED,gBAAgB;;;;AAIlB,EAAEE,cAAc;;EAEd,CAAC;AACH;AAEO,MAAME,oBAA8C;IACzD,MAAM;IACN,aAAa;QACX,MAAM;QACN,QAAQ;QACR,QAAQ;YACN,MAAM;YACN,YAAY;gBACV,MAAM;oBACJ,MAAM;oBACN,aAAa;gBACf;gBACA,QAAQ;oBACN,MAAM;oBACN,OAAO;wBACL,MAAM;oBACR;oBACA,aAAa;gBACf;YACF;YACA,UAAU;gBAAC;gBAAQ;aAAS;YAC5B,sBAAsB;QACxB;IACF;AACF"}

package/dist/es/ai-model/prompt/llm-locator.mjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"ai-model/prompt/llm-locator.mjs","sources":["~~webpack://@midscene/core/./~~src/ai-model/prompt/llm-locator.ts"],"sourcesContent":["import type { TVlModeTypes } from '@midscene/shared/env';\nimport { bboxDescription } from './common';\nexport function systemPromptToLocateElement(vlMode: TVlModeTypes \| undefined) {\n const bboxComment = bboxDescription(vlMode);\n return `\n## Role:\nYou are an AI assistant that helps identify UI elements.\n\n## Objective:\n- Identify elements in screenshots that match the user's description.\n- Provide the coordinates of the element that matches the user's description.\n\n## Output Format:\n\\`\\`\\`json\n{\n \"bbox\": [number, number, number, number], // ${bboxComment}\n \"errors\"?: string[]\n}\n\\`\\`\\`\n\nFields:\n* \\`bbox\\` is the bounding box of the element that matches the user's description\n* \\`errors\\` is an optional array of error messages (if any)\n\nFor example, when an element is found:\n\\`\\`\\`json\n{\n \"bbox\": [100, 100, 200, 200],\n \"errors\": []\n}\n\\`\\`\\`\n\nWhen no element is found:\n\\`\\`\\`json\n{\n \"bbox\": [],\n \"errors\": [\"I can see ..., but {some element} is not found\"]\n}\n\\`\\`\\`\n`;\n}\n\nexport const findElementPrompt = (targetElementDescription: string) =>\n `Find: ${targetElementDescription}`;\n"],"names":["systemPromptToLocateElement","vlMode","bboxComment","bboxDescription","findElementPrompt","targetElementDescription"],"mappings":";AAEO,SAASA,4BAA4BC,MAAgC;IAC1E,MAAMC,cAAcC,gBAAgBF;IACpC,OAAO,CAAC;;;;;;;;;;;gDAWsC,EAAEC,YAAY;;;;;;;;;;;;;;;;;;;;;;;;AAwB9D,CAAC;AACD;AAEO,MAAME,oBAAoB,CAACC,2BAChC,CAAC,MAAM,EAAEA,0BAA0B"}
1	+ {"version":3,"file":"ai-model/prompt/llm-locator.mjs","sources":["../../../../src/ai-model/prompt/llm-locator.ts"],"sourcesContent":["import type { TVlModeTypes } from '@midscene/shared/env';\nimport { bboxDescription } from './common';\nexport function systemPromptToLocateElement(vlMode: TVlModeTypes \| undefined) {\n const bboxComment = bboxDescription(vlMode);\n return `\n## Role:\nYou are an AI assistant that helps identify UI elements.\n\n## Objective:\n- Identify elements in screenshots that match the user's description.\n- Provide the coordinates of the element that matches the user's description.\n\n## Output Format:\n\\`\\`\\`json\n{\n \"bbox\": [number, number, number, number], // ${bboxComment}\n \"errors\"?: string[]\n}\n\\`\\`\\`\n\nFields:\n* \\`bbox\\` is the bounding box of the element that matches the user's description\n* \\`errors\\` is an optional array of error messages (if any)\n\nFor example, when an element is found:\n\\`\\`\\`json\n{\n \"bbox\": [100, 100, 200, 200],\n \"errors\": []\n}\n\\`\\`\\`\n\nWhen no element is found:\n\\`\\`\\`json\n{\n \"bbox\": [],\n \"errors\": [\"I can see ..., but {some element} is not found\"]\n}\n\\`\\`\\`\n`;\n}\n\nexport const findElementPrompt = (targetElementDescription: string) =>\n `Find: ${targetElementDescription}`;\n"],"names":["systemPromptToLocateElement","vlMode","bboxComment","bboxDescription","findElementPrompt","targetElementDescription"],"mappings":";AAEO,SAASA,4BAA4BC,MAAgC;IAC1E,MAAMC,cAAcC,gBAAgBF;IACpC,OAAO,CAAC;;;;;;;;;;;gDAWsC,EAAEC,YAAY;;;;;;;;;;;;;;;;;;;;;;;;AAwB9D,CAAC;AACD;AAEO,MAAME,oBAAoB,CAACC,2BAChC,CAAC,MAAM,EAAEA,0BAA0B"}

package/dist/es/ai-model/prompt/llm-planning.mjs CHANGED Viewed

@@ -11,14 +11,12 @@ const descriptionForAction = (action, locatorSchemaTypeDescription)=>{
     const fields = [];
     fields.push(`- type: "${action.name}"`);
     if (action.paramSchema) {
-        var _schema__def;
         const paramLines = [];
         const schema = action.paramSchema;
-        const isZodObject = (null == (_schema__def = schema._def) ? void 0 : _schema__def.typeName) === 'ZodObject';
+        const isZodObject = schema._def?.typeName === 'ZodObject';
         if (isZodObject && schema.shape) {
             const shape = schema.shape;
             const getTypeName = (field)=>{
-                var _actualField__def;
                 const unwrapField = (f)=>{
                     if (!f._def) return f;
                     const typeName = f._def.typeName;
@@ -29,7 +27,7 @@ const descriptionForAction = (action, locatorSchemaTypeDescription)=>{
                     return f;
                 };
                 const actualField = unwrapField(field);
-                const fieldTypeName = null == (_actualField__def = actualField._def) ? void 0 : _actualField__def.typeName;
+                const fieldTypeName = actualField._def?.typeName;
                 if ('ZodString' === fieldTypeName) return 'string';
                 if ('ZodNumber' === fieldTypeName) return 'number';
                 if ('ZodBoolean' === fieldTypeName) return 'boolean';
@@ -39,13 +37,11 @@ const descriptionForAction = (action, locatorSchemaTypeDescription)=>{
                     return 'object';
                 }
                 if ('ZodEnum' === fieldTypeName) {
-                    var _actualField__def_values, _actualField__def1;
-                    const values = (null == (_actualField__def1 = actualField._def) ? void 0 : null == (_actualField__def_values = _actualField__def1.values) ? void 0 : _actualField__def_values.map((option)=>String(`'${option}'`)).join(', ')) ?? 'enum';
+                    const values = actualField._def?.values?.map((option)=>String(`'${option}'`)).join(', ') ?? 'enum';
                     return `enum(${values})`;
                 }
                 if ('ZodUnion' === fieldTypeName) {
-                    var _actualField__def2;
-                    const options = null == (_actualField__def2 = actualField._def) ? void 0 : _actualField__def2.options;
+                    const options = actualField._def?.options;
                     if (options && options.length > 0) {
                         const types = options.map((opt)=>getTypeName(opt));
                         return types.join(' | ');
@@ -56,7 +52,6 @@ const descriptionForAction = (action, locatorSchemaTypeDescription)=>{
                 return actualField.toString();
             };
             const getDescription = (field)=>{
-                var _actualField__def;
                 const unwrapField = (f)=>{
                     if (!f._def) return f;
                     const typeName = f._def.typeName;
@@ -69,7 +64,7 @@ const descriptionForAction = (action, locatorSchemaTypeDescription)=>{
                 if ("description" in field) return field.description || null;
                 const actualField = unwrapField(field);
                 if ("description" in actualField) return actualField.description || null;
-                if ((null == (_actualField__def = actualField._def) ? void 0 : _actualField__def.typeName) === 'ZodObject') {
+                if (actualField._def?.typeName === 'ZodObject') {
                     if ('midscene_location_field_flag' in actualField._def.shape()) return 'Location information for the target element';
                 }
                 return null;
@@ -90,8 +85,7 @@ const descriptionForAction = (action, locatorSchemaTypeDescription)=>{
                 });
             }
         } else {
-            var _schema__def1;
-            const schemaTypeName = null == (_schema__def1 = schema._def) ? void 0 : _schema__def1.typeName;
+            const schemaTypeName = schema._def?.typeName;
             let typeName = 'unknown';
             if ('ZodString' === schemaTypeName) typeName = 'string';
             else if ('ZodNumber' === schemaTypeName) typeName = 'number';

package/dist/es/ai-model/prompt/llm-planning.mjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"ai-model/prompt/llm-planning.mjs","sources":["webpack://@midscene/core/./src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["import type { DeviceAction, ThinkingStrategy } from '@/types';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport type { ResponseFormatJSONSchema } from 'openai/resources/index';\nimport type { z } from 'zod';\nimport { ifMidsceneLocatorField } from '../../common';\nimport { bboxDescription } from './common';\n\n// Note: put the log field first to trigger the CoT\n\nconst commonOutputFields = `\"error\"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use the same language as the user's instruction.\n \"more_actions_needed_by_instruction\": boolean, // Consider if there is still more action(s) to do after the action in \"Log\" is done, according to the instruction. If so, set this field to true. Otherwise, set it to false.`;\n\nconst vlLocateParam = (vlMode: TVlModeTypes \| undefined) => {\n if (vlMode) {\n return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(vlMode)}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as any;\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n // Helper function to get type name from zod schema\n const getTypeName = (field: any): string => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' \|\|\n typeName === 'ZodNullable' \|\|\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n // Handle ZodEffects (transformations, refinements, preprocessors)\n if (typeName === 'ZodEffects') {\n // For ZodEffects, unwrap the schema field which contains the underlying type\n if (f._def.schema) {\n return unwrapField(f._def.schema);\n }\n }\n\n return f;\n };\n\n const actualField = unwrapField(field);\n const fieldTypeName = actualField._def?.typeName;\n\n if (fieldTypeName === 'ZodString') return 'string';\n if (fieldTypeName === 'ZodNumber') return 'number';\n if (fieldTypeName === 'ZodBoolean') return 'boolean';\n if (fieldTypeName === 'ZodArray') return 'array';\n if (fieldTypeName === 'ZodObject') {\n // Check if this is a passthrough object (like MidsceneLocation)\n if (ifMidsceneLocatorField(actualField)) {\n return locatorSchemaTypeDescription;\n }\n return 'object';\n }\n if (fieldTypeName === 'ZodEnum') {\n const values =\n (actualField._def?.values as unknown[] \| undefined)\n ?.map((option: unknown) => String(`'${option}'`))\n .join(', ') ?? 'enum';\n\n return `enum(${values})`;\n }\n // Handle ZodUnion by taking the first option (for display purposes)\n if (fieldTypeName === 'ZodUnion') {\n const options = actualField._def?.options as any[] \| undefined;\n if (options && options.length > 0) {\n // For unions, list all types\n const types = options.map((opt: any) => getTypeName(opt));\n return types.join(' \| ');\n }\n return 'union';\n }\n\n console.warn(\n 'failed to parse Zod type. This may lead to wrong params from the LLM.\\n',\n actualField._def,\n );\n return actualField.toString();\n };\n\n // Helper function to get description from zod schema\n const getDescription = (field: z.ZodTypeAny): string \| null => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' \|\|\n typeName === 'ZodNullable' \|\|\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n // Handle ZodEffects (transformations, refinements, preprocessors)\n if (typeName === 'ZodEffects') {\n // For ZodEffects, unwrap the schema field which contains the underlying type\n if (f._def.schema) {\n return unwrapField(f._def.schema);\n }\n }\n\n return f;\n };\n\n // Check for direct description on the original field (wrapper may have description)\n if ('description' in field) {\n return field.description \|\| null;\n }\n\n const actualField = unwrapField(field);\n\n // Check for description on the unwrapped field\n if ('description' in actualField) {\n return actualField.description \|\| null;\n }\n\n // Check for MidsceneLocation fields and add description\n if (actualField._def?.typeName === 'ZodObject') {\n if ('midscene_location_field_flag' in actualField._def.shape()) {\n return 'Location information for the target element';\n }\n }\n\n return null;\n };\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as any).isOptional === 'function' &&\n (field as any).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name\n const typeName = getTypeName(field);\n\n // Get description\n const description = getDescription(field as z.ZodTypeAny);\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n if (description) {\n paramLine += ` // ${description}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n // For simple primitive types, the param should be passed directly as the value\n const schemaTypeName = schema._def?.typeName;\n let typeName = 'unknown';\n\n if (schemaTypeName === 'ZodString') typeName = 'string';\n else if (schemaTypeName === 'ZodNumber') typeName = 'number';\n else if (schemaTypeName === 'ZodBoolean') typeName = 'boolean';\n\n // Get description if available\n const description = 'description' in schema ? schema.description : null;\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n return `- ${action.name}, ${action.description \|\| 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n vlMode,\n includeBbox,\n thinkingStrategy,\n}: {\n actionSpace: DeviceAction<any>[];\n vlMode: TVlModeTypes \| undefined;\n includeBbox: boolean;\n thinkingStrategy: ThinkingStrategy;\n}) {\n // Validate parameters: if includeBbox is true, vlMode must be defined\n if (includeBbox && !vlMode) {\n throw new Error(\n 'vlMode cannot be undefined when includeBbox is true. A valid vlMode is required for bbox-based location.',\n );\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n vlLocateParam(includeBbox ? vlMode : undefined),\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n // Conditionally include log field based on thinkingStrategy\n const logFieldDefinition =\n thinkingStrategy === 'off'\n ? ''\n : '\"log\": string, // Log your thoughts and what the next one action (ONLY ONE!) you can do according to the screenshot and the instruction. The log should contain the following information: \"The user wants to do ... . According to the instruction and the previous logs, next step is to .... Now i am going to compose an action \\'{ action-type }\\' to do this\". If no action should be done, log the reason. Use the same language as the user\\'s instruction.\\n ';\n\n const exampleLogField =\n thinkingStrategy === 'off'\n ? ''\n : \"\\\"log\\\": \\\"The user wants to do click 'Confirm' button, and click 'Yes' in popup. The current progress is ..., we still need to ... . Now i am going to compose an action '...' to click 'Yes' in popup.\\\",\\n \";\n\n return `\nTarget: User will give you an instruction, some screenshots and previous logs indicating what have been done. Your task is to plan the next one action to accomplish the instruction.\n\nPlease tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires. \n\nRestriction:\n- Don't give extra actions or plans beyond the instruction. ONLY plan for what the instruction requires. For example, don't try to submit the form if the instruction is only to fill something.\n- Give just the next ONE action you should do\n- Make sure the previous actions are completed successfully before performing the next step\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 5 times, you should think this is an error and set the \"error\" field to the error message.\n- If the user mentions something to assert and the condition is not met, you should think this is an error and set the \"error\" field to the error message.\n\nSupporting actions:\n${actionList}\n\nReturn in JSON format:\n{\n ${logFieldDefinition}${commonOutputFields}\n \"action\": \n {\n // one of the supporting actions\n } \| null,\n ,\n \"sleep\"?: number, // The sleep time after the action, in milliseconds.\n}\n\nFor example, when the instruction is \"click 'Confirm' button, and click 'Yes' in popup\" and the previous log shows \"The 'Confirm' button has been clicked\", by viewing the screenshot and previous logs, you should consider: We have already clicked the 'Confirm' button, so next we should find and click 'Yes' in popup.\n\nthis and output the JSON:\n\n{\n ${exampleLogField}\"action\": {\n \"type\": \"Tap\",\n \"param\": {\n \"locate\": {\n ${vlMode ? `\"bbox\": [100, 100, 200, 200],` : ''}\n \"prompt\": \"The 'Yes' button in popup\"\n }\n }\n },\n \"more_actions_needed_by_instruction\": false,\n}\n`;\n}\n\nexport const planSchema: ResponseFormatJSONSchema = {\n type: 'json_schema',\n json_schema: {\n name: 'action_items',\n strict: false,\n schema: {\n type: 'object',\n strict: false,\n properties: {\n actions: {\n type: 'array',\n items: {\n type: 'object',\n strict: false,\n properties: {\n thought: {\n type: 'string',\n description:\n 'Reasons for generating this task, and why this task is feasible on this page',\n },\n type: {\n type: 'string',\n description: 'Type of action',\n },\n param: {\n anyOf: [\n { type: 'null' },\n {\n type: 'object',\n additionalProperties: true,\n },\n ],\n description: 'Parameter of the action',\n },\n locate: {\n type: ['object', 'null'],\n properties: {\n id: { type: 'string' },\n prompt: { type: 'string' },\n },\n required: ['id', 'prompt'],\n additionalProperties: false,\n description: 'Location information for the target element',\n },\n },\n required: ['thought', 'type', 'param', 'locate'],\n additionalProperties: false,\n },\n description: 'List of actions to be performed',\n },\n more_actions_needed_by_instruction: {\n type: 'boolean',\n description:\n 'If all the actions described in the instruction have been covered by this action and logs, set this field to false.',\n },\n log: {\n type: 'string',\n description:\n 'Log what these planned actions do. Do not include further actions that have not been planned.',\n },\n error: {\n type: ['string', 'null'],\n description: 'Error messages about unexpected situations',\n },\n },\n required: [\n 'actions',\n 'more_actions_needed_by_instruction',\n 'log',\n 'error',\n ],\n additionalProperties: false,\n },\n },\n};\n"],"names":["commonOutputFields","vlLocateParam","vlMode","bboxDescription","descriptionForAction","action","locatorSchemaTypeDescription","tab","fields","_schema__def","paramLines","schema","isZodObject","shape","getTypeName","field","_actualField__def","unwrapField","f","typeName","actualField","fieldTypeName","ifMidsceneLocatorField","_actualField__def_values","values","option","String","_actualField__def2","options","types","opt","console","getDescription","key","Object","isOptional","keyWithOptional","description","paramLine","line","_schema__def1","schemaTypeName","paramDescription","systemPromptToTaskPlanning","actionSpace","includeBbox","thinkingStrategy","Error","actionDescriptionList","undefined","actionList","logFieldDefinition","exampleLogField","planSchema"],"mappings":";;AASA,MAAMA,qBAAqB,CAAC;+NACmM,CAAC;AAEhO,MAAMC,gBAAgB,CAACC;IACrB,IAAIA,QACF,OAAO,CAAC,6DAA6D,EAAEC,gBAAgBD,SAAS;IAElG,OAAO;AACT;AAEO,MAAME,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEH,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;YAKFI;QAJpB,MAAMC,aAAuB,EAAE;QAG/B,MAAMC,SAASN,OAAO,WAAW;QACjC,MAAMO,cAAcH,AAAAA,SAAAA,CAAAA,eAAAA,OAAO,IAAI,AAAD,IAAVA,KAAAA,IAAAA,aAAa,QAAQ,AAAD,MAAM;QAE9C,IAAIG,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAG1B,MAAMG,cAAc,CAACC;oBA4BGC;gBA1BtB,MAAMC,cAAc,CAACC;oBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;oBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;oBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;oBAIrC,IAAIC,AAAa,iBAAbA,UAEF;wBAAA,IAAID,EAAE,IAAI,CAAC,MAAM,EACf,OAAOD,YAAYC,EAAE,IAAI,CAAC,MAAM;oBAClC;oBAGF,OAAOA;gBACT;gBAEA,MAAME,cAAcH,YAAYF;gBAChC,MAAMM,gBAAgB,QAAAL,CAAAA,oBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,kBAAkB,QAAQ;gBAEhD,IAAIK,AAAkB,gBAAlBA,eAA+B,OAAO;gBAC1C,IAAIA,AAAkB,gBAAlBA,eAA+B,OAAO;gBAC1C,IAAIA,AAAkB,iBAAlBA,eAAgC,OAAO;gBAC3C,IAAIA,AAAkB,eAAlBA,eAA8B,OAAO;gBACzC,IAAIA,AAAkB,gBAAlBA,eAA+B;oBAEjC,IAAIC,uBAAuBF,cACzB,OAAOd;oBAET,OAAO;gBACT;gBACA,IAAIe,AAAkB,cAAlBA,eAA6B;wBAE5BE,0BAAAA;oBADH,MAAMC,SACJ,AAAC,SAAAD,CAAAA,qBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,QAAAA,CAAAA,2BAAAA,mBAAkB,MAAM,AAAD,IAAvBA,KAAAA,IAAAA,yBACG,GAAG,CAAC,CAACE,SAAoBC,OAAO,CAAC,CAAC,EAAED,OAAO,CAAC,CAAC,GAC9C,IAAI,CAAC,KAAI,KAAK;oBAEnB,OAAO,CAAC,KAAK,EAAED,OAAO,CAAC,CAAC;gBAC1B;gBAEA,IAAIH,AAAkB,eAAlBA,eAA8B;wBAChBM;oBAAhB,MAAMC,UAAU,QAAAD,CAAAA,qBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,mBAAkB,OAAO;oBACzC,IAAIC,WAAWA,QAAQ,MAAM,GAAG,GAAG;wBAEjC,MAAMC,QAAQD,QAAQ,GAAG,CAAC,CAACE,MAAahB,YAAYgB;wBACpD,OAAOD,MAAM,IAAI,CAAC;oBACpB;oBACA,OAAO;gBACT;gBAEAE,QAAQ,IAAI,CACV,2EACAX,YAAY,IAAI;gBAElB,OAAOA,YAAY,QAAQ;YAC7B;YAGA,MAAMY,iBAAiB,CAACjB;oBAwClBC;gBAtCJ,MAAMC,cAAc,CAACC;oBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;oBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;oBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;oBAIrC,IAAIC,AAAa,iBAAbA,UAEF;wBAAA,IAAID,EAAE,IAAI,CAAC,MAAM,EACf,OAAOD,YAAYC,EAAE,IAAI,CAAC,MAAM;oBAClC;oBAGF,OAAOA;gBACT;gBAGA,IAAI,iBAAiBH,OACnB,OAAOA,MAAM,WAAW,IAAI;gBAG9B,MAAMK,cAAcH,YAAYF;gBAGhC,IAAI,iBAAiBK,aACnB,OAAOA,YAAY,WAAW,IAAI;gBAIpC,IAAIJ,AAAAA,SAAAA,CAAAA,oBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,kBAAkB,QAAQ,AAAD,MAAM,aACjC;oBAAA,IAAI,kCAAkCI,YAAY,IAAI,CAAC,KAAK,IAC1D,OAAO;gBACT;gBAGF,OAAO;YACT;YAEA,KAAK,MAAM,CAACa,KAAKlB,MAAM,IAAImB,OAAO,OAAO,CAACrB,OACxC,IAAIE,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAMoB,aACJ,AAAqC,cAArC,OAAQpB,MAAc,UAAU,IAC/BA,MAAc,UAAU;gBAC3B,MAAMqB,kBAAkBD,aAAa,GAAGF,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAMd,WAAWL,YAAYC;gBAG7B,MAAMsB,cAAcL,eAAejB;gBAGnC,IAAIuB,YAAY,GAAGF,gBAAgB,EAAE,EAAEjB,UAAU;gBACjD,IAAIkB,aACFC,aAAa,CAAC,IAAI,EAAED,aAAa;gBAGnC3B,WAAW,IAAI,CAAC4B;YAClB;YAIF,IAAI5B,WAAW,MAAM,GAAG,GAAG;gBACzBF,OAAO,IAAI,CAAC;gBACZE,WAAW,OAAO,CAAC,CAAC6B;oBAClB/B,OAAO,IAAI,CAAC,CAAC,IAAI,EAAE+B,MAAM;gBAC3B;YACF;QACF,OAAO;gBAGkBC;YAAvB,MAAMC,iBAAiB,QAAAD,CAAAA,gBAAAA,OAAO,IAAI,AAAD,IAAVA,KAAAA,IAAAA,cAAa,QAAQ;YAC5C,IAAIrB,WAAW;YAEf,IAAIsB,AAAmB,gBAAnBA,gBAAgCtB,WAAW;iBAC1C,IAAIsB,AAAmB,gBAAnBA,gBAAgCtB,WAAW;iBAC/C,IAAIsB,AAAmB,iBAAnBA,gBAAiCtB,WAAW;YAGrD,MAAMkB,cAAc,iBAAiB1B,SAASA,OAAO,WAAW,GAAG;YAGnE,IAAI+B,mBAAmB,CAAC,SAAS,EAAEvB,UAAU;YAC7C,IAAIkB,aACFK,oBAAoB,CAAC,IAAI,EAAEL,aAAa;YAE1CK,oBAAoB;YAEpBlC,OAAO,IAAI,CAACkC;QACd;IACF;IAEA,OAAO,CAAC,EAAE,EAAErC,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAeoC,2BAA2B,EAC/CC,WAAW,EACX1C,MAAM,EACN2C,WAAW,EACXC,gBAAgB,EAMjB;IAEC,IAAID,eAAe,CAAC3C,QAClB,MAAM,IAAI6C,MACR;IAIJ,MAAMC,wBAAwBJ,YAAY,GAAG,CAAC,CAACvC,SACtCD,qBACLC,QACAJ,cAAc4C,cAAc3C,SAAS+C;IAGzC,MAAMC,aAAaF,sBAAsB,IAAI,CAAC;IAG9C,MAAMG,qBACJL,AAAqB,UAArBA,mBACI,KACA;IAEN,MAAMM,kBACJN,AAAqB,UAArBA,mBACI,KACA;IAEN,OAAO,CAAC;;;;;;;;;;;;;AAaV,EAAEI,WAAW;;;;EAIX,EAAEC,qBAAqBnD,mBAAmB;;;;;;;;;;;;;;EAc1C,EAAEoD,gBAAgB;;;;QAIZ,EAAElD,SAAS,kCAAkC,GAAG;;;;;;;AAOxD,CAAC;AACD;AAEO,MAAMmD,aAAuC;IAClD,MAAM;IACN,aAAa;QACX,MAAM;QACN,QAAQ;QACR,QAAQ;YACN,MAAM;YACN,QAAQ;YACR,YAAY;gBACV,SAAS;oBACP,MAAM;oBACN,OAAO;wBACL,MAAM;wBACN,QAAQ;wBACR,YAAY;4BACV,SAAS;gCACP,MAAM;gCACN,aACE;4BACJ;4BACA,MAAM;gCACJ,MAAM;gCACN,aAAa;4BACf;4BACA,OAAO;gCACL,OAAO;oCACL;wCAAE,MAAM;oCAAO;oCACf;wCACE,MAAM;wCACN,sBAAsB;oCACxB;iCACD;gCACD,aAAa;4BACf;4BACA,QAAQ;gCACN,MAAM;oCAAC;oCAAU;iCAAO;gCACxB,YAAY;oCACV,IAAI;wCAAE,MAAM;oCAAS;oCACrB,QAAQ;wCAAE,MAAM;oCAAS;gCAC3B;gCACA,UAAU;oCAAC;oCAAM;iCAAS;gCAC1B,sBAAsB;gCACtB,aAAa;4BACf;wBACF;wBACA,UAAU;4BAAC;4BAAW;4BAAQ;4BAAS;yBAAS;wBAChD,sBAAsB;oBACxB;oBACA,aAAa;gBACf;gBACA,oCAAoC;oBAClC,MAAM;oBACN,aACE;gBACJ;gBACA,KAAK;oBACH,MAAM;oBACN,aACE;gBACJ;gBACA,OAAO;oBACL,MAAM;wBAAC;wBAAU;qBAAO;oBACxB,aAAa;gBACf;YACF;YACA,UAAU;gBACR;gBACA;gBACA;gBACA;aACD;YACD,sBAAsB;QACxB;IACF;AACF"}
1	+ {"version":3,"file":"ai-model/prompt/llm-planning.mjs","sources":["../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["import type { DeviceAction, ThinkingStrategy } from '@/types';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport type { ResponseFormatJSONSchema } from 'openai/resources/index';\nimport type { z } from 'zod';\nimport { ifMidsceneLocatorField } from '../../common';\nimport { bboxDescription } from './common';\n\n// Note: put the log field first to trigger the CoT\n\nconst commonOutputFields = `\"error\"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use the same language as the user's instruction.\n \"more_actions_needed_by_instruction\": boolean, // Consider if there is still more action(s) to do after the action in \"Log\" is done, according to the instruction. If so, set this field to true. Otherwise, set it to false.`;\n\nconst vlLocateParam = (vlMode: TVlModeTypes \| undefined) => {\n if (vlMode) {\n return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(vlMode)}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as any;\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n // Helper function to get type name from zod schema\n const getTypeName = (field: any): string => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' \|\|\n typeName === 'ZodNullable' \|\|\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n // Handle ZodEffects (transformations, refinements, preprocessors)\n if (typeName === 'ZodEffects') {\n // For ZodEffects, unwrap the schema field which contains the underlying type\n if (f._def.schema) {\n return unwrapField(f._def.schema);\n }\n }\n\n return f;\n };\n\n const actualField = unwrapField(field);\n const fieldTypeName = actualField._def?.typeName;\n\n if (fieldTypeName === 'ZodString') return 'string';\n if (fieldTypeName === 'ZodNumber') return 'number';\n if (fieldTypeName === 'ZodBoolean') return 'boolean';\n if (fieldTypeName === 'ZodArray') return 'array';\n if (fieldTypeName === 'ZodObject') {\n // Check if this is a passthrough object (like MidsceneLocation)\n if (ifMidsceneLocatorField(actualField)) {\n return locatorSchemaTypeDescription;\n }\n return 'object';\n }\n if (fieldTypeName === 'ZodEnum') {\n const values =\n (actualField._def?.values as unknown[] \| undefined)\n ?.map((option: unknown) => String(`'${option}'`))\n .join(', ') ?? 'enum';\n\n return `enum(${values})`;\n }\n // Handle ZodUnion by taking the first option (for display purposes)\n if (fieldTypeName === 'ZodUnion') {\n const options = actualField._def?.options as any[] \| undefined;\n if (options && options.length > 0) {\n // For unions, list all types\n const types = options.map((opt: any) => getTypeName(opt));\n return types.join(' \| ');\n }\n return 'union';\n }\n\n console.warn(\n 'failed to parse Zod type. This may lead to wrong params from the LLM.\\n',\n actualField._def,\n );\n return actualField.toString();\n };\n\n // Helper function to get description from zod schema\n const getDescription = (field: z.ZodTypeAny): string \| null => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' \|\|\n typeName === 'ZodNullable' \|\|\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n // Handle ZodEffects (transformations, refinements, preprocessors)\n if (typeName === 'ZodEffects') {\n // For ZodEffects, unwrap the schema field which contains the underlying type\n if (f._def.schema) {\n return unwrapField(f._def.schema);\n }\n }\n\n return f;\n };\n\n // Check for direct description on the original field (wrapper may have description)\n if ('description' in field) {\n return field.description \|\| null;\n }\n\n const actualField = unwrapField(field);\n\n // Check for description on the unwrapped field\n if ('description' in actualField) {\n return actualField.description \|\| null;\n }\n\n // Check for MidsceneLocation fields and add description\n if (actualField._def?.typeName === 'ZodObject') {\n if ('midscene_location_field_flag' in actualField._def.shape()) {\n return 'Location information for the target element';\n }\n }\n\n return null;\n };\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as any).isOptional === 'function' &&\n (field as any).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name\n const typeName = getTypeName(field);\n\n // Get description\n const description = getDescription(field as z.ZodTypeAny);\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n if (description) {\n paramLine += ` // ${description}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n // For simple primitive types, the param should be passed directly as the value\n const schemaTypeName = schema._def?.typeName;\n let typeName = 'unknown';\n\n if (schemaTypeName === 'ZodString') typeName = 'string';\n else if (schemaTypeName === 'ZodNumber') typeName = 'number';\n else if (schemaTypeName === 'ZodBoolean') typeName = 'boolean';\n\n // Get description if available\n const description = 'description' in schema ? schema.description : null;\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n return `- ${action.name}, ${action.description \|\| 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n vlMode,\n includeBbox,\n thinkingStrategy,\n}: {\n actionSpace: DeviceAction<any>[];\n vlMode: TVlModeTypes \| undefined;\n includeBbox: boolean;\n thinkingStrategy: ThinkingStrategy;\n}) {\n // Validate parameters: if includeBbox is true, vlMode must be defined\n if (includeBbox && !vlMode) {\n throw new Error(\n 'vlMode cannot be undefined when includeBbox is true. A valid vlMode is required for bbox-based location.',\n );\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n vlLocateParam(includeBbox ? vlMode : undefined),\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n // Conditionally include log field based on thinkingStrategy\n const logFieldDefinition =\n thinkingStrategy === 'off'\n ? ''\n : '\"log\": string, // Log your thoughts and what the next one action (ONLY ONE!) you can do according to the screenshot and the instruction. The log should contain the following information: \"The user wants to do ... . According to the instruction and the previous logs, next step is to .... Now i am going to compose an action \\'{ action-type }\\' to do this\". If no action should be done, log the reason. Use the same language as the user\\'s instruction.\\n ';\n\n const exampleLogField =\n thinkingStrategy === 'off'\n ? ''\n : \"\\\"log\\\": \\\"The user wants to do click 'Confirm' button, and click 'Yes' in popup. The current progress is ..., we still need to ... . Now i am going to compose an action '...' to click 'Yes' in popup.\\\",\\n \";\n\n return `\nTarget: User will give you an instruction, some screenshots and previous logs indicating what have been done. Your task is to plan the next one action to accomplish the instruction.\n\nPlease tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires. \n\nRestriction:\n- Don't give extra actions or plans beyond the instruction. ONLY plan for what the instruction requires. For example, don't try to submit the form if the instruction is only to fill something.\n- Give just the next ONE action you should do\n- Make sure the previous actions are completed successfully before performing the next step\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 5 times, you should think this is an error and set the \"error\" field to the error message.\n- If the user mentions something to assert and the condition is not met, you should think this is an error and set the \"error\" field to the error message.\n\nSupporting actions:\n${actionList}\n\nReturn in JSON format:\n{\n ${logFieldDefinition}${commonOutputFields}\n \"action\": \n {\n // one of the supporting actions\n } \| null,\n ,\n \"sleep\"?: number, // The sleep time after the action, in milliseconds.\n}\n\nFor example, when the instruction is \"click 'Confirm' button, and click 'Yes' in popup\" and the previous log shows \"The 'Confirm' button has been clicked\", by viewing the screenshot and previous logs, you should consider: We have already clicked the 'Confirm' button, so next we should find and click 'Yes' in popup.\n\nthis and output the JSON:\n\n{\n ${exampleLogField}\"action\": {\n \"type\": \"Tap\",\n \"param\": {\n \"locate\": {\n ${vlMode ? `\"bbox\": [100, 100, 200, 200],` : ''}\n \"prompt\": \"The 'Yes' button in popup\"\n }\n }\n },\n \"more_actions_needed_by_instruction\": false,\n}\n`;\n}\n\nexport const planSchema: ResponseFormatJSONSchema = {\n type: 'json_schema',\n json_schema: {\n name: 'action_items',\n strict: false,\n schema: {\n type: 'object',\n strict: false,\n properties: {\n actions: {\n type: 'array',\n items: {\n type: 'object',\n strict: false,\n properties: {\n thought: {\n type: 'string',\n description:\n 'Reasons for generating this task, and why this task is feasible on this page',\n },\n type: {\n type: 'string',\n description: 'Type of action',\n },\n param: {\n anyOf: [\n { type: 'null' },\n {\n type: 'object',\n additionalProperties: true,\n },\n ],\n description: 'Parameter of the action',\n },\n locate: {\n type: ['object', 'null'],\n properties: {\n id: { type: 'string' },\n prompt: { type: 'string' },\n },\n required: ['id', 'prompt'],\n additionalProperties: false,\n description: 'Location information for the target element',\n },\n },\n required: ['thought', 'type', 'param', 'locate'],\n additionalProperties: false,\n },\n description: 'List of actions to be performed',\n },\n more_actions_needed_by_instruction: {\n type: 'boolean',\n description:\n 'If all the actions described in the instruction have been covered by this action and logs, set this field to false.',\n },\n log: {\n type: 'string',\n description:\n 'Log what these planned actions do. Do not include further actions that have not been planned.',\n },\n error: {\n type: ['string', 'null'],\n description: 'Error messages about unexpected situations',\n },\n },\n required: [\n 'actions',\n 'more_actions_needed_by_instruction',\n 'log',\n 'error',\n ],\n additionalProperties: false,\n },\n },\n};\n"],"names":["commonOutputFields","vlLocateParam","vlMode","bboxDescription","descriptionForAction","action","locatorSchemaTypeDescription","tab","fields","paramLines","schema","isZodObject","shape","getTypeName","field","unwrapField","f","typeName","actualField","fieldTypeName","ifMidsceneLocatorField","values","option","String","options","types","opt","console","getDescription","key","Object","isOptional","keyWithOptional","description","paramLine","line","schemaTypeName","paramDescription","systemPromptToTaskPlanning","actionSpace","includeBbox","thinkingStrategy","Error","actionDescriptionList","undefined","actionList","logFieldDefinition","exampleLogField","planSchema"],"mappings":";;AASA,MAAMA,qBAAqB,CAAC;+NACmM,CAAC;AAEhO,MAAMC,gBAAgB,CAACC;IACrB,IAAIA,QACF,OAAO,CAAC,6DAA6D,EAAEC,gBAAgBD,SAAS;IAElG,OAAO;AACT;AAEO,MAAME,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEH,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMI,aAAuB,EAAE;QAG/B,MAAMC,SAASL,OAAO,WAAW;QACjC,MAAMM,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAG1B,MAAMG,cAAc,CAACC;gBAEnB,MAAMC,cAAc,CAACC;oBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;oBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;oBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;oBAIrC,IAAIC,AAAa,iBAAbA,UAEF;wBAAA,IAAID,EAAE,IAAI,CAAC,MAAM,EACf,OAAOD,YAAYC,EAAE,IAAI,CAAC,MAAM;oBAClC;oBAGF,OAAOA;gBACT;gBAEA,MAAME,cAAcH,YAAYD;gBAChC,MAAMK,gBAAgBD,YAAY,IAAI,EAAE;gBAExC,IAAIC,AAAkB,gBAAlBA,eAA+B,OAAO;gBAC1C,IAAIA,AAAkB,gBAAlBA,eAA+B,OAAO;gBAC1C,IAAIA,AAAkB,iBAAlBA,eAAgC,OAAO;gBAC3C,IAAIA,AAAkB,eAAlBA,eAA8B,OAAO;gBACzC,IAAIA,AAAkB,gBAAlBA,eAA+B;oBAEjC,IAAIC,uBAAuBF,cACzB,OAAOZ;oBAET,OAAO;gBACT;gBACA,IAAIa,AAAkB,cAAlBA,eAA6B;oBAC/B,MAAME,SACHH,YAAY,IAAI,EAAE,QACf,IAAI,CAACI,SAAoBC,OAAO,CAAC,CAAC,EAAED,OAAO,CAAC,CAAC,GAC9C,KAAK,SAAS;oBAEnB,OAAO,CAAC,KAAK,EAAED,OAAO,CAAC,CAAC;gBAC1B;gBAEA,IAAIF,AAAkB,eAAlBA,eAA8B;oBAChC,MAAMK,UAAUN,YAAY,IAAI,EAAE;oBAClC,IAAIM,WAAWA,QAAQ,MAAM,GAAG,GAAG;wBAEjC,MAAMC,QAAQD,QAAQ,GAAG,CAAC,CAACE,MAAab,YAAYa;wBACpD,OAAOD,MAAM,IAAI,CAAC;oBACpB;oBACA,OAAO;gBACT;gBAEAE,QAAQ,IAAI,CACV,2EACAT,YAAY,IAAI;gBAElB,OAAOA,YAAY,QAAQ;YAC7B;YAGA,MAAMU,iBAAiB,CAACd;gBAEtB,MAAMC,cAAc,CAACC;oBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;oBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;oBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;oBAIrC,IAAIC,AAAa,iBAAbA,UAEF;wBAAA,IAAID,EAAE,IAAI,CAAC,MAAM,EACf,OAAOD,YAAYC,EAAE,IAAI,CAAC,MAAM;oBAClC;oBAGF,OAAOA;gBACT;gBAGA,IAAI,iBAAiBF,OACnB,OAAOA,MAAM,WAAW,IAAI;gBAG9B,MAAMI,cAAcH,YAAYD;gBAGhC,IAAI,iBAAiBI,aACnB,OAAOA,YAAY,WAAW,IAAI;gBAIpC,IAAIA,YAAY,IAAI,EAAE,aAAa,aACjC;oBAAA,IAAI,kCAAkCA,YAAY,IAAI,CAAC,KAAK,IAC1D,OAAO;gBACT;gBAGF,OAAO;YACT;YAEA,KAAK,MAAM,CAACW,KAAKf,MAAM,IAAIgB,OAAO,OAAO,CAAClB,OACxC,IAAIE,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAMiB,aACJ,AAAqC,cAArC,OAAQjB,MAAc,UAAU,IAC/BA,MAAc,UAAU;gBAC3B,MAAMkB,kBAAkBD,aAAa,GAAGF,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAMZ,WAAWJ,YAAYC;gBAG7B,MAAMmB,cAAcL,eAAed;gBAGnC,IAAIoB,YAAY,GAAGF,gBAAgB,EAAE,EAAEf,UAAU;gBACjD,IAAIgB,aACFC,aAAa,CAAC,IAAI,EAAED,aAAa;gBAGnCxB,WAAW,IAAI,CAACyB;YAClB;YAIF,IAAIzB,WAAW,MAAM,GAAG,GAAG;gBACzBD,OAAO,IAAI,CAAC;gBACZC,WAAW,OAAO,CAAC,CAAC0B;oBAClB3B,OAAO,IAAI,CAAC,CAAC,IAAI,EAAE2B,MAAM;gBAC3B;YACF;QACF,OAAO;YAGL,MAAMC,iBAAiB1B,OAAO,IAAI,EAAE;YACpC,IAAIO,WAAW;YAEf,IAAImB,AAAmB,gBAAnBA,gBAAgCnB,WAAW;iBAC1C,IAAImB,AAAmB,gBAAnBA,gBAAgCnB,WAAW;iBAC/C,IAAImB,AAAmB,iBAAnBA,gBAAiCnB,WAAW;YAGrD,MAAMgB,cAAc,iBAAiBvB,SAASA,OAAO,WAAW,GAAG;YAGnE,IAAI2B,mBAAmB,CAAC,SAAS,EAAEpB,UAAU;YAC7C,IAAIgB,aACFI,oBAAoB,CAAC,IAAI,EAAEJ,aAAa;YAE1CI,oBAAoB;YAEpB7B,OAAO,IAAI,CAAC6B;QACd;IACF;IAEA,OAAO,CAAC,EAAE,EAAEhC,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAe+B,2BAA2B,EAC/CC,WAAW,EACXrC,MAAM,EACNsC,WAAW,EACXC,gBAAgB,EAMjB;IAEC,IAAID,eAAe,CAACtC,QAClB,MAAM,IAAIwC,MACR;IAIJ,MAAMC,wBAAwBJ,YAAY,GAAG,CAAC,CAAClC,SACtCD,qBACLC,QACAJ,cAAcuC,cAActC,SAAS0C;IAGzC,MAAMC,aAAaF,sBAAsB,IAAI,CAAC;IAG9C,MAAMG,qBACJL,AAAqB,UAArBA,mBACI,KACA;IAEN,MAAMM,kBACJN,AAAqB,UAArBA,mBACI,KACA;IAEN,OAAO,CAAC;;;;;;;;;;;;;AAaV,EAAEI,WAAW;;;;EAIX,EAAEC,qBAAqB9C,mBAAmB;;;;;;;;;;;;;;EAc1C,EAAE+C,gBAAgB;;;;QAIZ,EAAE7C,SAAS,kCAAkC,GAAG;;;;;;;AAOxD,CAAC;AACD;AAEO,MAAM8C,aAAuC;IAClD,MAAM;IACN,aAAa;QACX,MAAM;QACN,QAAQ;QACR,QAAQ;YACN,MAAM;YACN,QAAQ;YACR,YAAY;gBACV,SAAS;oBACP,MAAM;oBACN,OAAO;wBACL,MAAM;wBACN,QAAQ;wBACR,YAAY;4BACV,SAAS;gCACP,MAAM;gCACN,aACE;4BACJ;4BACA,MAAM;gCACJ,MAAM;gCACN,aAAa;4BACf;4BACA,OAAO;gCACL,OAAO;oCACL;wCAAE,MAAM;oCAAO;oCACf;wCACE,MAAM;wCACN,sBAAsB;oCACxB;iCACD;gCACD,aAAa;4BACf;4BACA,QAAQ;gCACN,MAAM;oCAAC;oCAAU;iCAAO;gCACxB,YAAY;oCACV,IAAI;wCAAE,MAAM;oCAAS;oCACrB,QAAQ;wCAAE,MAAM;oCAAS;gCAC3B;gCACA,UAAU;oCAAC;oCAAM;iCAAS;gCAC1B,sBAAsB;gCACtB,aAAa;4BACf;wBACF;wBACA,UAAU;4BAAC;4BAAW;4BAAQ;4BAAS;yBAAS;wBAChD,sBAAsB;oBACxB;oBACA,aAAa;gBACf;gBACA,oCAAoC;oBAClC,MAAM;oBACN,aACE;gBACJ;gBACA,KAAK;oBACH,MAAM;oBACN,aACE;gBACJ;gBACA,OAAO;oBACL,MAAM;wBAAC;wBAAU;qBAAO;oBACxB,aAAa;gBACf;YACF;YACA,UAAU;gBACR;gBACA;gBACA;gBACA;aACD;YACD,sBAAsB;QACxB;IACF;AACF"}

package/dist/es/ai-model/prompt/llm-section-locator.mjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"ai-model/prompt/llm-section-locator.mjs","sources":["~~webpack://@midscene/core/./~~src/ai-model/prompt/llm-section-locator.ts"],"sourcesContent":["import type { TVlModeTypes } from '@midscene/shared/env';\nimport { bboxDescription } from './common';\n\nexport function systemPromptToLocateSection(vlMode: TVlModeTypes \| undefined) {\n const bboxFormat = bboxDescription(vlMode);\n return `\n## Role:\nYou are an AI assistant that helps identify UI elements.\n\n## Objective:\n- Find a section containing the target element\n- If the description mentions reference elements, also locate sections containing those references\n\n## Output Format:\n\\`\\`\\`json\n{\n \"bbox\": [number, number, number, number], // ${bboxFormat}\n \"references_bbox\"?: [\n [number, number, number, number],\n ...\n ],\n \"error\"?: string\n}\n\\`\\`\\`\n\nFields:\n* \\`bbox\\` - Bounding box of the section containing the target element\n* \\`references_bbox\\` - Optional array of bounding boxes for reference elements\n* \\`error\\` - Optional error message if the section cannot be found\n\nExample:\nIf the description is \"delete button on the second row with title 'Peter'\", return:\n\\`\\`\\`json\n{\n \"bbox\": [100, 100, 200, 200],\n \"references_bbox\": [[100, 100, 200, 200]]\n}\n\\`\\`\\`\n`;\n}\n\nexport const sectionLocatorInstruction = (sectionDescription: string) =>\n `Find section containing: ${sectionDescription}`;\n"],"names":["systemPromptToLocateSection","vlMode","bboxFormat","bboxDescription","sectionLocatorInstruction","sectionDescription"],"mappings":";AAGO,SAASA,4BAA4BC,MAAgC;IAC1E,MAAMC,aAAaC,gBAAgBF;IACnC,OAAO,CAAC;;;;;;;;;;;gDAWsC,EAAEC,WAAW;;;;;;;;;;;;;;;;;;;;;;AAsB7D,CAAC;AACD;AAEO,MAAME,4BAA4B,CAACC,qBACxC,CAAC,yBAAyB,EAAEA,oBAAoB"}
1	+ {"version":3,"file":"ai-model/prompt/llm-section-locator.mjs","sources":["../../../../src/ai-model/prompt/llm-section-locator.ts"],"sourcesContent":["import type { TVlModeTypes } from '@midscene/shared/env';\nimport { bboxDescription } from './common';\n\nexport function systemPromptToLocateSection(vlMode: TVlModeTypes \| undefined) {\n const bboxFormat = bboxDescription(vlMode);\n return `\n## Role:\nYou are an AI assistant that helps identify UI elements.\n\n## Objective:\n- Find a section containing the target element\n- If the description mentions reference elements, also locate sections containing those references\n\n## Output Format:\n\\`\\`\\`json\n{\n \"bbox\": [number, number, number, number], // ${bboxFormat}\n \"references_bbox\"?: [\n [number, number, number, number],\n ...\n ],\n \"error\"?: string\n}\n\\`\\`\\`\n\nFields:\n* \\`bbox\\` - Bounding box of the section containing the target element\n* \\`references_bbox\\` - Optional array of bounding boxes for reference elements\n* \\`error\\` - Optional error message if the section cannot be found\n\nExample:\nIf the description is \"delete button on the second row with title 'Peter'\", return:\n\\`\\`\\`json\n{\n \"bbox\": [100, 100, 200, 200],\n \"references_bbox\": [[100, 100, 200, 200]]\n}\n\\`\\`\\`\n`;\n}\n\nexport const sectionLocatorInstruction = (sectionDescription: string) =>\n `Find section containing: ${sectionDescription}`;\n"],"names":["systemPromptToLocateSection","vlMode","bboxFormat","bboxDescription","sectionLocatorInstruction","sectionDescription"],"mappings":";AAGO,SAASA,4BAA4BC,MAAgC;IAC1E,MAAMC,aAAaC,gBAAgBF;IACnC,OAAO,CAAC;;;;;;;;;;;gDAWsC,EAAEC,WAAW;;;;;;;;;;;;;;;;;;;;;;AAsB7D,CAAC;AACD;AAEO,MAAME,4BAA4B,CAACC,qBACxC,CAAC,yBAAyB,EAAEA,oBAAoB"}

package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"ai-model/prompt/order-sensitive-judge.mjs","sources":["~~webpack://@midscene/core/./~~src/ai-model/prompt/order-sensitive-judge.ts"],"sourcesContent":["export function systemPromptToJudgeOrderSensitive() {\n return `\n## Role:\nYou are an AI assistant that analyzes UI element descriptions.\n\n## Objective:\nDetermine whether a given element description is order-sensitive.\n\nOrder-sensitive descriptions contain phrases that specify position or sequence, such as:\n- \"the first button\"\n- \"the second item\"\n- \"the third row\"\n- \"the last input\"\n- \"the 5th element\"\n\nOrder-insensitive descriptions do not specify position:\n- \"login button\"\n- \"search input\"\n- \"submit button\"\n- \"user avatar\"\n\n## Output Format:\n\\`\\`\\`json\n{\n \"isOrderSensitive\": boolean\n}\n\\`\\`\\`\n\nReturn true if the description is order-sensitive, false otherwise.\n`;\n}\n\nexport const orderSensitiveJudgePrompt = (description: string) => {\n return `Analyze this element description: \"${description}\"`;\n};\n"],"names":["systemPromptToJudgeOrderSensitive","orderSensitiveJudgePrompt","description"],"mappings":"AAAO,SAASA;IACd,OAAO,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA4BV,CAAC;AACD;AAEO,MAAMC,4BAA4B,CAACC,cACjC,CAAC,mCAAmC,EAAEA,YAAY,CAAC,CAAC"}
1	+ {"version":3,"file":"ai-model/prompt/order-sensitive-judge.mjs","sources":["../../../../src/ai-model/prompt/order-sensitive-judge.ts"],"sourcesContent":["export function systemPromptToJudgeOrderSensitive() {\n return `\n## Role:\nYou are an AI assistant that analyzes UI element descriptions.\n\n## Objective:\nDetermine whether a given element description is order-sensitive.\n\nOrder-sensitive descriptions contain phrases that specify position or sequence, such as:\n- \"the first button\"\n- \"the second item\"\n- \"the third row\"\n- \"the last input\"\n- \"the 5th element\"\n\nOrder-insensitive descriptions do not specify position:\n- \"login button\"\n- \"search input\"\n- \"submit button\"\n- \"user avatar\"\n\n## Output Format:\n\\`\\`\\`json\n{\n \"isOrderSensitive\": boolean\n}\n\\`\\`\\`\n\nReturn true if the description is order-sensitive, false otherwise.\n`;\n}\n\nexport const orderSensitiveJudgePrompt = (description: string) => {\n return `Analyze this element description: \"${description}\"`;\n};\n"],"names":["systemPromptToJudgeOrderSensitive","orderSensitiveJudgePrompt","description"],"mappings":"AAAO,SAASA;IACd,OAAO,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA4BV,CAAC;AACD;AAEO,MAAMC,4BAA4B,CAACC,cACjC,CAAC,mCAAmC,EAAEA,YAAY,CAAC,CAAC"}

package/dist/es/ai-model/prompt/playwright-generator.mjs CHANGED Viewed

@@ -48,7 +48,7 @@ ${PLAYWRIGHT_EXAMPLE_CODE}`;
         }
     ];
     const response = await callAIWithStringResponse(prompt, AIActionType.TEXT, modelConfig);
-    if ((null == response ? void 0 : response.content) && 'string' == typeof response.content) return response.content;
+    if (response?.content && 'string' == typeof response.content) return response.content;
     throw new Error('Failed to generate Playwright test code');
 };
 const generatePlaywrightTestStream = async (events, options, modelConfig)=>{
@@ -104,7 +104,7 @@ ${PLAYWRIGHT_EXAMPLE_CODE}`;
     });
     {
         const response = await callAIWithStringResponse(prompt, AIActionType.TEXT, modelConfig);
-        if ((null == response ? void 0 : response.content) && 'string' == typeof response.content) return {
+        if (response?.content && 'string' == typeof response.content) return {
             content: response.content,
             usage: response.usage,
             isStreamed: false

package/dist/es/ai-model/prompt/playwright-generator.mjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"ai-model/prompt/playwright-generator.mjs","sources":["webpack://@midscene/core/./src/ai-model/prompt/playwright-generator.ts"],"sourcesContent":["import type {\n StreamingAIResponse,\n StreamingCodeGenerationOptions,\n} from '@/types';\nimport { PLAYWRIGHT_EXAMPLE_CODE } from '@midscene/shared/constants';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport { AIActionType, callAI, callAIWithStringResponse } from '../index';\n// Import shared utilities and types from yaml-generator\nimport {\n type ChromeRecordedEvent,\n type EventCounts,\n type EventSummary,\n type InputDescription,\n type ProcessedEvent,\n createEventCounts,\n createMessageContent,\n extractInputDescriptions,\n filterEventsByType,\n getScreenshotsForLLM,\n prepareEventSummary,\n processEventsForLLM,\n validateEvents,\n} from './yaml-generator';\n\n// Playwright-specific interfaces\nexport interface PlaywrightGenerationOptions {\n testName?: string;\n includeScreenshots?: boolean;\n includeTimestamps?: boolean;\n maxScreenshots?: number;\n description?: string;\n viewportSize?: { width: number; height: number };\n waitForNetworkIdle?: boolean;\n waitForNetworkIdleTimeout?: number;\n}\n\n// Re-export shared types for backward compatibility\nexport type {\n ChromeRecordedEvent,\n EventCounts,\n InputDescription,\n ProcessedEvent,\n EventSummary,\n};\n\n// Re-export shared utilities for backward compatibility\nexport {\n getScreenshotsForLLM,\n filterEventsByType,\n createEventCounts,\n extractInputDescriptions,\n processEventsForLLM,\n prepareEventSummary,\n createMessageContent,\n validateEvents,\n};\n\n/*\n Generates Playwright test code from recorded events\n /\nexport const generatePlaywrightTest = async (\n events: ChromeRecordedEvent[],\n options: PlaywrightGenerationOptions,\n modelConfig: IModelConfig,\n): Promise<string> => {\n // Validate input\n validateEvents(events);\n\n // Prepare event summary using shared utilities\n const summary = prepareEventSummary(events, {\n testName: options.testName,\n maxScreenshots: options.maxScreenshots \|\| 3,\n });\n\n // Add Playwright-specific options to summary\n const playwrightSummary = {\n ...summary,\n waitForNetworkIdle: options.waitForNetworkIdle !== false,\n waitForNetworkIdleTimeout: options.waitForNetworkIdleTimeout \|\| 2000,\n viewportSize: options.viewportSize \|\| { width: 1280, height: 800 },\n };\n\n // Get screenshots for visual context\n const screenshots = getScreenshotsForLLM(events, options.maxScreenshots \|\| 3);\n\n // Create prompt text\n const promptText = `Generate a Playwright test using @midscene/web/playwright that reproduces this recorded browser session. The test should be based on the following events and follow the structure of the example provided. Make the test descriptive with appropriate assertions and validations.\n\nEvent Summary:\n${JSON.stringify(playwrightSummary, null, 2)}\n\nGenerated code should:\n1. Import required dependencies\n2. Set up the test with proper configuration\n3. Include a beforeEach hook to navigate to the starting URL\n4. Implement a test that uses Midscene AI methods (aiTap, aiInput, aiAssert, etc.)\n5. Include appropriate assertions and validations\n6. Follow best practices for Playwright tests\n7. Be ready to execute without further modification\n\nImportant: Return ONLY the raw Playwright test code. Do NOT wrap the response in markdown code blocks (no \\`\\`\\`typescript, \\`\\`\\`javascript or \\`\\`\\`). Start directly with the code content.`;\n\n // Create message content with screenshots\n const messageContent = createMessageContent(\n promptText,\n screenshots,\n options.includeScreenshots !== false,\n );\n\n // Create system prompt\n const systemPrompt = `You are an expert test automation engineer specializing in Playwright and Midscene. \nYour task is to generate a complete, executable Playwright test using @midscene/web/playwright that reproduces a recorded browser session.\n\n${PLAYWRIGHT_EXAMPLE_CODE}`;\n\n // Use LLM to generate the Playwright test code\n const prompt: ChatCompletionMessageParam[] = [\n {\n role: 'system',\n content: systemPrompt,\n },\n {\n role: 'user',\n content: messageContent,\n },\n ];\n\n const response = await callAIWithStringResponse(\n prompt,\n AIActionType.TEXT,\n modelConfig,\n );\n\n if (response?.content && typeof response.content === 'string') {\n return response.content;\n }\n\n throw new Error('Failed to generate Playwright test code');\n};\n\n/\n Generates Playwright test code from recorded events with streaming support\n */\nexport const generatePlaywrightTestStream = async (\n events: ChromeRecordedEvent[],\n options: PlaywrightGenerationOptions & StreamingCodeGenerationOptions,\n modelConfig: IModelConfig,\n): Promise<StreamingAIResponse> => {\n // Validate input\n validateEvents(events);\n\n // Prepare event summary using shared utilities\n const summary = prepareEventSummary(events, {\n testName: options.testName,\n maxScreenshots: options.maxScreenshots \|\| 3,\n });\n\n // Add Playwright-specific options to summary\n const playwrightSummary = {\n ...summary,\n waitForNetworkIdle: options.waitForNetworkIdle !== false,\n waitForNetworkIdleTimeout: options.waitForNetworkIdleTimeout \|\| 2000,\n viewportSize: options.viewportSize \|\| { width: 1280, height: 800 },\n };\n\n // Get screenshots for visual context\n const screenshots = getScreenshotsForLLM(events, options.maxScreenshots \|\| 3);\n\n // Create prompt text\n const promptText = `Generate a Playwright test using @midscene/web/playwright that reproduces this recorded browser session. The test should be based on the following events and follow the structure of the example provided. Make the test descriptive with appropriate assertions and validations.\n\nEvent Summary:\n${JSON.stringify(playwrightSummary, null, 2)}\n\nGenerated code should:\n1. Import required dependencies\n2. Set up the test with proper configuration\n3. Include a beforeEach hook to navigate to the starting URL\n4. Implement a test that uses Midscene AI methods (aiTap, aiInput, aiAssert, etc.)\n5. Include appropriate assertions and validations\n6. Follow best practices for Playwright tests\n7. Be ready to execute without further modification\n8. can't wrap this test code in markdown code block\n\nImportant: Return ONLY the raw Playwright test code. Do NOT wrap the response in markdown code blocks (no \\`\\`\\`typescript, \\`\\`\\`javascript or \\`\\`\\`). Start directly with the code content.`;\n\n // Create message content with screenshots\n const messageContent = createMessageContent(\n promptText,\n screenshots,\n options.includeScreenshots !== false,\n );\n\n // Create system prompt\n const systemPrompt = `You are an expert test automation engineer specializing in Playwright and Midscene. \nYour task is to generate a complete, executable Playwright test using @midscene/web/playwright that reproduces a recorded browser session.\n\n${PLAYWRIGHT_EXAMPLE_CODE}`;\n\n // Use LLM to generate the Playwright test code with streaming\n const prompt: ChatCompletionMessageParam[] = [\n {\n role: 'system',\n content: systemPrompt,\n },\n {\n role: 'user',\n content: messageContent,\n },\n ];\n\n if (options.stream && options.onChunk) {\n // Use streaming\n return await callAI(prompt, AIActionType.TEXT, modelConfig, {\n stream: true,\n onChunk: options.onChunk,\n });\n } else {\n // Fallback to non-streaming\n const response = await callAIWithStringResponse(\n prompt,\n AIActionType.TEXT,\n modelConfig,\n );\n\n if (response?.content && typeof response.content === 'string') {\n return {\n content: response.content,\n usage: response.usage,\n isStreamed: false,\n };\n }\n\n throw new Error('Failed to generate Playwright test code');\n }\n};\n"],"names":["generatePlaywrightTest","events","options","modelConfig","validateEvents","summary","prepareEventSummary","playwrightSummary","screenshots","getScreenshotsForLLM","promptText","JSON","messageContent","createMessageContent","systemPrompt","PLAYWRIGHT_EXAMPLE_CODE","prompt","response","callAIWithStringResponse","AIActionType","Error","generatePlaywrightTestStream","callAI"],"mappings":";;;AA6DO,MAAMA,yBAAyB,OACpCC,QACAC,SACAC;IAGAC,eAAeH;IAGf,MAAMI,UAAUC,oBAAoBL,QAAQ;QAC1C,UAAUC,QAAQ,QAAQ;QAC1B,gBAAgBA,QAAQ,cAAc,IAAI;IAC5C;IAGA,MAAMK,oBAAoB;QACxB,GAAGF,OAAO;QACV,oBAAoBH,AAA+B,UAA/BA,QAAQ,kBAAkB;QAC9C,2BAA2BA,QAAQ,yBAAyB,IAAI;QAChE,cAAcA,QAAQ,YAAY,IAAI;YAAE,OAAO;YAAM,QAAQ;QAAI;IACnE;IAGA,MAAMM,cAAcC,qBAAqBR,QAAQC,QAAQ,cAAc,IAAI;IAG3E,MAAMQ,aAAa,CAAC;;;AAGtB,EAAEC,KAAK,SAAS,CAACJ,mBAAmB,MAAM,GAAG;;;;;;;;;;;8LAWiJ,CAAC;IAG7L,MAAMK,iBAAiBC,qBACrBH,YACAF,aACAN,AAA+B,UAA/BA,QAAQ,kBAAkB;IAI5B,MAAMY,eAAe,CAAC;;;AAGxB,EAAEC,yBAAyB;IAGzB,MAAMC,SAAuC;QAC3C;YACE,MAAM;YACN,SAASF;QACX;QACA;YACE,MAAM;YACN,SAASF;QACX;KACD;IAED,MAAMK,WAAW,MAAMC,yBACrBF,QACAG,aAAa,IAAI,EACjBhB;IAGF,IAAIc,AAAAA,CAAAA,QAAAA,WAAAA,KAAAA,IAAAA,SAAU,OAAO,AAAD,KAAK,AAA4B,YAA5B,OAAOA,SAAS,OAAO,EAC9C,OAAOA,SAAS,OAAO;IAGzB,MAAM,IAAIG,MAAM;AAClB;AAKO,MAAMC,+BAA+B,OAC1CpB,QACAC,SACAC;IAGAC,eAAeH;IAGf,MAAMI,UAAUC,oBAAoBL,QAAQ;QAC1C,UAAUC,QAAQ,QAAQ;QAC1B,gBAAgBA,QAAQ,cAAc,IAAI;IAC5C;IAGA,MAAMK,oBAAoB;QACxB,GAAGF,OAAO;QACV,oBAAoBH,AAA+B,UAA/BA,QAAQ,kBAAkB;QAC9C,2BAA2BA,QAAQ,yBAAyB,IAAI;QAChE,cAAcA,QAAQ,YAAY,IAAI;YAAE,OAAO;YAAM,QAAQ;QAAI;IACnE;IAGA,MAAMM,cAAcC,qBAAqBR,QAAQC,QAAQ,cAAc,IAAI;IAG3E,MAAMQ,aAAa,CAAC;;;AAGtB,EAAEC,KAAK,SAAS,CAACJ,mBAAmB,MAAM,GAAG;;;;;;;;;;;;8LAYiJ,CAAC;IAG7L,MAAMK,iBAAiBC,qBACrBH,YACAF,aACAN,AAA+B,UAA/BA,QAAQ,kBAAkB;IAI5B,MAAMY,eAAe,CAAC;;;AAGxB,EAAEC,yBAAyB;IAGzB,MAAMC,SAAuC;QAC3C;YACE,MAAM;YACN,SAASF;QACX;QACA;YACE,MAAM;YACN,SAASF;QACX;KACD;IAED,IAAIV,QAAQ,MAAM,IAAIA,QAAQ,OAAO,EAEnC,OAAO,MAAMoB,OAAON,QAAQG,aAAa,IAAI,EAAEhB,aAAa;QAC1D,QAAQ;QACR,SAASD,QAAQ,OAAO;IAC1B;IACK;QAEL,MAAMe,WAAW,MAAMC,yBACrBF,QACAG,aAAa,IAAI,EACjBhB;QAGF,IAAIc,AAAAA,CAAAA,QAAAA,WAAAA,KAAAA,IAAAA,SAAU,OAAO,AAAD,KAAK,AAA4B,YAA5B,OAAOA,SAAS,OAAO,EAC9C,OAAO;YACL,SAASA,SAAS,OAAO;YACzB,OAAOA,SAAS,KAAK;YACrB,YAAY;QACd;QAGF,MAAM,IAAIG,MAAM;IAClB;AACF"}
1	+ {"version":3,"file":"ai-model/prompt/playwright-generator.mjs","sources":["../../../../src/ai-model/prompt/playwright-generator.ts"],"sourcesContent":["import type {\n StreamingAIResponse,\n StreamingCodeGenerationOptions,\n} from '@/types';\nimport { PLAYWRIGHT_EXAMPLE_CODE } from '@midscene/shared/constants';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport { AIActionType, callAI, callAIWithStringResponse } from '../index';\n// Import shared utilities and types from yaml-generator\nimport {\n type ChromeRecordedEvent,\n type EventCounts,\n type EventSummary,\n type InputDescription,\n type ProcessedEvent,\n createEventCounts,\n createMessageContent,\n extractInputDescriptions,\n filterEventsByType,\n getScreenshotsForLLM,\n prepareEventSummary,\n processEventsForLLM,\n validateEvents,\n} from './yaml-generator';\n\n// Playwright-specific interfaces\nexport interface PlaywrightGenerationOptions {\n testName?: string;\n includeScreenshots?: boolean;\n includeTimestamps?: boolean;\n maxScreenshots?: number;\n description?: string;\n viewportSize?: { width: number; height: number };\n waitForNetworkIdle?: boolean;\n waitForNetworkIdleTimeout?: number;\n}\n\n// Re-export shared types for backward compatibility\nexport type {\n ChromeRecordedEvent,\n EventCounts,\n InputDescription,\n ProcessedEvent,\n EventSummary,\n};\n\n// Re-export shared utilities for backward compatibility\nexport {\n getScreenshotsForLLM,\n filterEventsByType,\n createEventCounts,\n extractInputDescriptions,\n processEventsForLLM,\n prepareEventSummary,\n createMessageContent,\n validateEvents,\n};\n\n/*\n Generates Playwright test code from recorded events\n /\nexport const generatePlaywrightTest = async (\n events: ChromeRecordedEvent[],\n options: PlaywrightGenerationOptions,\n modelConfig: IModelConfig,\n): Promise<string> => {\n // Validate input\n validateEvents(events);\n\n // Prepare event summary using shared utilities\n const summary = prepareEventSummary(events, {\n testName: options.testName,\n maxScreenshots: options.maxScreenshots \|\| 3,\n });\n\n // Add Playwright-specific options to summary\n const playwrightSummary = {\n ...summary,\n waitForNetworkIdle: options.waitForNetworkIdle !== false,\n waitForNetworkIdleTimeout: options.waitForNetworkIdleTimeout \|\| 2000,\n viewportSize: options.viewportSize \|\| { width: 1280, height: 800 },\n };\n\n // Get screenshots for visual context\n const screenshots = getScreenshotsForLLM(events, options.maxScreenshots \|\| 3);\n\n // Create prompt text\n const promptText = `Generate a Playwright test using @midscene/web/playwright that reproduces this recorded browser session. The test should be based on the following events and follow the structure of the example provided. Make the test descriptive with appropriate assertions and validations.\n\nEvent Summary:\n${JSON.stringify(playwrightSummary, null, 2)}\n\nGenerated code should:\n1. Import required dependencies\n2. Set up the test with proper configuration\n3. Include a beforeEach hook to navigate to the starting URL\n4. Implement a test that uses Midscene AI methods (aiTap, aiInput, aiAssert, etc.)\n5. Include appropriate assertions and validations\n6. Follow best practices for Playwright tests\n7. Be ready to execute without further modification\n\nImportant: Return ONLY the raw Playwright test code. Do NOT wrap the response in markdown code blocks (no \\`\\`\\`typescript, \\`\\`\\`javascript or \\`\\`\\`). Start directly with the code content.`;\n\n // Create message content with screenshots\n const messageContent = createMessageContent(\n promptText,\n screenshots,\n options.includeScreenshots !== false,\n );\n\n // Create system prompt\n const systemPrompt = `You are an expert test automation engineer specializing in Playwright and Midscene. \nYour task is to generate a complete, executable Playwright test using @midscene/web/playwright that reproduces a recorded browser session.\n\n${PLAYWRIGHT_EXAMPLE_CODE}`;\n\n // Use LLM to generate the Playwright test code\n const prompt: ChatCompletionMessageParam[] = [\n {\n role: 'system',\n content: systemPrompt,\n },\n {\n role: 'user',\n content: messageContent,\n },\n ];\n\n const response = await callAIWithStringResponse(\n prompt,\n AIActionType.TEXT,\n modelConfig,\n );\n\n if (response?.content && typeof response.content === 'string') {\n return response.content;\n }\n\n throw new Error('Failed to generate Playwright test code');\n};\n\n/\n Generates Playwright test code from recorded events with streaming support\n */\nexport const generatePlaywrightTestStream = async (\n events: ChromeRecordedEvent[],\n options: PlaywrightGenerationOptions & StreamingCodeGenerationOptions,\n modelConfig: IModelConfig,\n): Promise<StreamingAIResponse> => {\n // Validate input\n validateEvents(events);\n\n // Prepare event summary using shared utilities\n const summary = prepareEventSummary(events, {\n testName: options.testName,\n maxScreenshots: options.maxScreenshots \|\| 3,\n });\n\n // Add Playwright-specific options to summary\n const playwrightSummary = {\n ...summary,\n waitForNetworkIdle: options.waitForNetworkIdle !== false,\n waitForNetworkIdleTimeout: options.waitForNetworkIdleTimeout \|\| 2000,\n viewportSize: options.viewportSize \|\| { width: 1280, height: 800 },\n };\n\n // Get screenshots for visual context\n const screenshots = getScreenshotsForLLM(events, options.maxScreenshots \|\| 3);\n\n // Create prompt text\n const promptText = `Generate a Playwright test using @midscene/web/playwright that reproduces this recorded browser session. The test should be based on the following events and follow the structure of the example provided. Make the test descriptive with appropriate assertions and validations.\n\nEvent Summary:\n${JSON.stringify(playwrightSummary, null, 2)}\n\nGenerated code should:\n1. Import required dependencies\n2. Set up the test with proper configuration\n3. Include a beforeEach hook to navigate to the starting URL\n4. Implement a test that uses Midscene AI methods (aiTap, aiInput, aiAssert, etc.)\n5. Include appropriate assertions and validations\n6. Follow best practices for Playwright tests\n7. Be ready to execute without further modification\n8. can't wrap this test code in markdown code block\n\nImportant: Return ONLY the raw Playwright test code. Do NOT wrap the response in markdown code blocks (no \\`\\`\\`typescript, \\`\\`\\`javascript or \\`\\`\\`). Start directly with the code content.`;\n\n // Create message content with screenshots\n const messageContent = createMessageContent(\n promptText,\n screenshots,\n options.includeScreenshots !== false,\n );\n\n // Create system prompt\n const systemPrompt = `You are an expert test automation engineer specializing in Playwright and Midscene. \nYour task is to generate a complete, executable Playwright test using @midscene/web/playwright that reproduces a recorded browser session.\n\n${PLAYWRIGHT_EXAMPLE_CODE}`;\n\n // Use LLM to generate the Playwright test code with streaming\n const prompt: ChatCompletionMessageParam[] = [\n {\n role: 'system',\n content: systemPrompt,\n },\n {\n role: 'user',\n content: messageContent,\n },\n ];\n\n if (options.stream && options.onChunk) {\n // Use streaming\n return await callAI(prompt, AIActionType.TEXT, modelConfig, {\n stream: true,\n onChunk: options.onChunk,\n });\n } else {\n // Fallback to non-streaming\n const response = await callAIWithStringResponse(\n prompt,\n AIActionType.TEXT,\n modelConfig,\n );\n\n if (response?.content && typeof response.content === 'string') {\n return {\n content: response.content,\n usage: response.usage,\n isStreamed: false,\n };\n }\n\n throw new Error('Failed to generate Playwright test code');\n }\n};\n"],"names":["generatePlaywrightTest","events","options","modelConfig","validateEvents","summary","prepareEventSummary","playwrightSummary","screenshots","getScreenshotsForLLM","promptText","JSON","messageContent","createMessageContent","systemPrompt","PLAYWRIGHT_EXAMPLE_CODE","prompt","response","callAIWithStringResponse","AIActionType","Error","generatePlaywrightTestStream","callAI"],"mappings":";;;AA6DO,MAAMA,yBAAyB,OACpCC,QACAC,SACAC;IAGAC,eAAeH;IAGf,MAAMI,UAAUC,oBAAoBL,QAAQ;QAC1C,UAAUC,QAAQ,QAAQ;QAC1B,gBAAgBA,QAAQ,cAAc,IAAI;IAC5C;IAGA,MAAMK,oBAAoB;QACxB,GAAGF,OAAO;QACV,oBAAoBH,AAA+B,UAA/BA,QAAQ,kBAAkB;QAC9C,2BAA2BA,QAAQ,yBAAyB,IAAI;QAChE,cAAcA,QAAQ,YAAY,IAAI;YAAE,OAAO;YAAM,QAAQ;QAAI;IACnE;IAGA,MAAMM,cAAcC,qBAAqBR,QAAQC,QAAQ,cAAc,IAAI;IAG3E,MAAMQ,aAAa,CAAC;;;AAGtB,EAAEC,KAAK,SAAS,CAACJ,mBAAmB,MAAM,GAAG;;;;;;;;;;;8LAWiJ,CAAC;IAG7L,MAAMK,iBAAiBC,qBACrBH,YACAF,aACAN,AAA+B,UAA/BA,QAAQ,kBAAkB;IAI5B,MAAMY,eAAe,CAAC;;;AAGxB,EAAEC,yBAAyB;IAGzB,MAAMC,SAAuC;QAC3C;YACE,MAAM;YACN,SAASF;QACX;QACA;YACE,MAAM;YACN,SAASF;QACX;KACD;IAED,MAAMK,WAAW,MAAMC,yBACrBF,QACAG,aAAa,IAAI,EACjBhB;IAGF,IAAIc,UAAU,WAAW,AAA4B,YAA5B,OAAOA,SAAS,OAAO,EAC9C,OAAOA,SAAS,OAAO;IAGzB,MAAM,IAAIG,MAAM;AAClB;AAKO,MAAMC,+BAA+B,OAC1CpB,QACAC,SACAC;IAGAC,eAAeH;IAGf,MAAMI,UAAUC,oBAAoBL,QAAQ;QAC1C,UAAUC,QAAQ,QAAQ;QAC1B,gBAAgBA,QAAQ,cAAc,IAAI;IAC5C;IAGA,MAAMK,oBAAoB;QACxB,GAAGF,OAAO;QACV,oBAAoBH,AAA+B,UAA/BA,QAAQ,kBAAkB;QAC9C,2BAA2BA,QAAQ,yBAAyB,IAAI;QAChE,cAAcA,QAAQ,YAAY,IAAI;YAAE,OAAO;YAAM,QAAQ;QAAI;IACnE;IAGA,MAAMM,cAAcC,qBAAqBR,QAAQC,QAAQ,cAAc,IAAI;IAG3E,MAAMQ,aAAa,CAAC;;;AAGtB,EAAEC,KAAK,SAAS,CAACJ,mBAAmB,MAAM,GAAG;;;;;;;;;;;;8LAYiJ,CAAC;IAG7L,MAAMK,iBAAiBC,qBACrBH,YACAF,aACAN,AAA+B,UAA/BA,QAAQ,kBAAkB;IAI5B,MAAMY,eAAe,CAAC;;;AAGxB,EAAEC,yBAAyB;IAGzB,MAAMC,SAAuC;QAC3C;YACE,MAAM;YACN,SAASF;QACX;QACA;YACE,MAAM;YACN,SAASF;QACX;KACD;IAED,IAAIV,QAAQ,MAAM,IAAIA,QAAQ,OAAO,EAEnC,OAAO,MAAMoB,OAAON,QAAQG,aAAa,IAAI,EAAEhB,aAAa;QAC1D,QAAQ;QACR,SAASD,QAAQ,OAAO;IAC1B;IACK;QAEL,MAAMe,WAAW,MAAMC,yBACrBF,QACAG,aAAa,IAAI,EACjBhB;QAGF,IAAIc,UAAU,WAAW,AAA4B,YAA5B,OAAOA,SAAS,OAAO,EAC9C,OAAO;YACL,SAASA,SAAS,OAAO;YACzB,OAAOA,SAAS,KAAK;YACrB,YAAY;QACd;QAGF,MAAM,IAAIG,MAAM;IAClB;AACF"}

package/dist/es/ai-model/prompt/ui-tars-locator.mjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"ai-model/prompt/ui-tars-locator.mjs","sources":["~~webpack://@midscene/core/./~~src/ai-model/prompt/ui-tars-locator.ts"],"sourcesContent":["import { getPreferredLanguage } from '@midscene/shared/env';\n\n// claude 3.5 sonnet computer The ability to understand the content of the image is better, Does not provide element snapshot effect\nexport function systemPromptToLocateElementPosition() {\n const preferredLanguage = getPreferredLanguage();\n\n return `\nYou are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. \n\n## Output Format\n\\`\\`\\`\nThought: ...\nAction: ...\n\\`\\`\\`\n\n## Action Space\nclick(start_box='[x1, y1, x2, y2]')\nleft_double(start_box='[x1, y1, x2, y2]')\nright_single(start_box='[x1, y1, x2, y2]')\ndrag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')\nhotkey(key='')\ntype(content='') #If you want to submit your input, use \"\\\\n\" at the end of \\`content\\`.\nscroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')\nwait() #Sleep for 5s and take a screenshot to check for any changes.\nfinished()\ncall_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.\n\n## Note\n- Use ${preferredLanguage} in \\`Thought\\` part.\n- Write a small plan and finally summarize your next action (with its target element) in one sentence in \\`Thought\\` part.\n\n## User Instruction\n `;\n}\n"],"names":["systemPromptToLocateElementPosition","preferredLanguage","getPreferredLanguage"],"mappings":";AAGO,SAASA;IACd,MAAMC,oBAAoBC;IAE1B,OAAO,CAAC;;;;;;;;;;;;;;;;;;;;;;MAsBJ,EAAED,kBAAkB;;;;IAItB,CAAC;AACL"}
1	+ {"version":3,"file":"ai-model/prompt/ui-tars-locator.mjs","sources":["../../../../src/ai-model/prompt/ui-tars-locator.ts"],"sourcesContent":["import { getPreferredLanguage } from '@midscene/shared/env';\n\n// claude 3.5 sonnet computer The ability to understand the content of the image is better, Does not provide element snapshot effect\nexport function systemPromptToLocateElementPosition() {\n const preferredLanguage = getPreferredLanguage();\n\n return `\nYou are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. \n\n## Output Format\n\\`\\`\\`\nThought: ...\nAction: ...\n\\`\\`\\`\n\n## Action Space\nclick(start_box='[x1, y1, x2, y2]')\nleft_double(start_box='[x1, y1, x2, y2]')\nright_single(start_box='[x1, y1, x2, y2]')\ndrag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')\nhotkey(key='')\ntype(content='') #If you want to submit your input, use \"\\\\n\" at the end of \\`content\\`.\nscroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')\nwait() #Sleep for 5s and take a screenshot to check for any changes.\nfinished()\ncall_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.\n\n## Note\n- Use ${preferredLanguage} in \\`Thought\\` part.\n- Write a small plan and finally summarize your next action (with its target element) in one sentence in \\`Thought\\` part.\n\n## User Instruction\n `;\n}\n"],"names":["systemPromptToLocateElementPosition","preferredLanguage","getPreferredLanguage"],"mappings":";AAGO,SAASA;IACd,MAAMC,oBAAoBC;IAE1B,OAAO,CAAC;;;;;;;;;;;;;;;;;;;;;;MAsBJ,EAAED,kBAAkB;;;;IAItB,CAAC;AACL"}

package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"ai-model/prompt/ui-tars-planning.mjs","sources":["~~webpack://@midscene/core/./~~src/ai-model/prompt/ui-tars-planning.ts"],"sourcesContent":["import { getPreferredLanguage } from '@midscene/shared/env';\n\nexport function getUiTarsPlanningPrompt(): string {\n const preferredLanguage = getPreferredLanguage();\n\n return `\nYou are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. \n\n## Output Format\n\\`\\`\\`\nThought: ...\nAction: ...\n\\`\\`\\`\n\n## Action Space\n\nclick(start_box='[x1, y1, x2, y2]')\nleft_double(start_box='[x1, y1, x2, y2]')\nright_single(start_box='[x1, y1, x2, y2]')\ndrag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')\nhotkey(key='')\ntype(content='xxx') # Use escape characters \\\\', \\\\\\\", and \\\\n in content part to ensure we can parse the content in normal python string format. If you want to submit your input, use \\\\n at the end of content. \nscroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')\nwait() #Sleep for 5s and take a screenshot to check for any changes.\nfinished(content='xxx') # Use escape characters \\\\', \\\\\", and \\\\n in content part to ensure we can parse the content in normal python string format.\n\n\n## Note\n- Use ${preferredLanguage} in \\`Thought\\` part.\n- Write a small plan and finally summarize your next action (with its target element) in one sentence in \\`Thought\\` part.\n\n## User Instruction\n`;\n}\n\nexport const getSummary = (prediction: string) =>\n prediction\n .replace(/Reflection:[\\s\\S]*?(?=Action_Summary:\|Action:\|$)/g, '')\n .trim();\n"],"names":["getUiTarsPlanningPrompt","preferredLanguage","getPreferredLanguage","getSummary","prediction"],"mappings":";AAEO,SAASA;IACd,MAAMC,oBAAoBC;IAE1B,OAAO,CAAC;;;;;;;;;;;;;;;;;;;;;;;MAuBJ,EAAED,kBAAkB;;;;AAI1B,CAAC;AACD;AAEO,MAAME,aAAa,CAACC,aACzBA,WACG,OAAO,CAAC,qDAAqD,IAC7D,IAAI"}
1	+ {"version":3,"file":"ai-model/prompt/ui-tars-planning.mjs","sources":["../../../../src/ai-model/prompt/ui-tars-planning.ts"],"sourcesContent":["import { getPreferredLanguage } from '@midscene/shared/env';\n\nexport function getUiTarsPlanningPrompt(): string {\n const preferredLanguage = getPreferredLanguage();\n\n return `\nYou are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task. \n\n## Output Format\n\\`\\`\\`\nThought: ...\nAction: ...\n\\`\\`\\`\n\n## Action Space\n\nclick(start_box='[x1, y1, x2, y2]')\nleft_double(start_box='[x1, y1, x2, y2]')\nright_single(start_box='[x1, y1, x2, y2]')\ndrag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')\nhotkey(key='')\ntype(content='xxx') # Use escape characters \\\\', \\\\\\\", and \\\\n in content part to ensure we can parse the content in normal python string format. If you want to submit your input, use \\\\n at the end of content. \nscroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')\nwait() #Sleep for 5s and take a screenshot to check for any changes.\nfinished(content='xxx') # Use escape characters \\\\', \\\\\", and \\\\n in content part to ensure we can parse the content in normal python string format.\n\n\n## Note\n- Use ${preferredLanguage} in \\`Thought\\` part.\n- Write a small plan and finally summarize your next action (with its target element) in one sentence in \\`Thought\\` part.\n\n## User Instruction\n`;\n}\n\nexport const getSummary = (prediction: string) =>\n prediction\n .replace(/Reflection:[\\s\\S]*?(?=Action_Summary:\|Action:\|$)/g, '')\n .trim();\n"],"names":["getUiTarsPlanningPrompt","preferredLanguage","getPreferredLanguage","getSummary","prediction"],"mappings":";AAEO,SAASA;IACd,MAAMC,oBAAoBC;IAE1B,OAAO,CAAC;;;;;;;;;;;;;;;;;;;;;;;MAuBJ,EAAED,kBAAkB;;;;AAI1B,CAAC;AACD;AAEO,MAAME,aAAa,CAACC,aACzBA,WACG,OAAO,CAAC,qDAAqD,IAC7D,IAAI"}

package/dist/es/ai-model/prompt/util.mjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"ai-model/prompt/util.mjs","sources":["~~webpack://@midscene/core/./~~src/ai-model/prompt/util.ts"],"sourcesContent":["import type { BaseElement, ElementTreeNode, Size, UIContext } from '@/types';\nimport { NodeType } from '@midscene/shared/constants';\nimport { assert } from '@midscene/shared/utils';\n\nexport function describeSize(size: Size) {\n return `${size.width} x ${size.height}`;\n}\n\nexport function describeElement(\n elements: (Pick<BaseElement, 'rect' \| 'content'> & { id: string })[],\n) {\n const sliceLength = 80;\n return elements\n .map((item) =>\n [\n item.id,\n item.rect.left,\n item.rect.top,\n item.rect.left + item.rect.width,\n item.rect.top + item.rect.height,\n item.content.length > sliceLength\n ? `${item.content.slice(0, sliceLength)}...`\n : item.content,\n ].join(', '),\n )\n .join('\\n');\n}\nexport const distanceThreshold = 16;\n\n// export function elementByPositionWithElementInfo(\n// treeRoot: ElementTreeNode<BaseElement>,\n// position: {\n// x: number;\n// y: number;\n// },\n// options?: {\n// requireStrictDistance?: boolean;\n// filterPositionElements?: boolean;\n// },\n// ) {\n// const requireStrictDistance = options?.requireStrictDistance ?? true;\n// const filterPositionElements = options?.filterPositionElements ?? false;\n\n// assert(typeof position !== 'undefined', 'position is required for query');\n\n// const matchingElements: BaseElement[] = [];\n\n// function dfs(node: ElementTreeNode<BaseElement>) {\n// if (node?.node) {\n// const item = node.node;\n// if (\n// item.rect.left <= position.x &&\n// position.x <= item.rect.left + item.rect.width &&\n// item.rect.top <= position.y &&\n// position.y <= item.rect.top + item.rect.height\n// ) {\n// if (\n// !(\n// filterPositionElements &&\n// item.attributes?.nodeType === NodeType.POSITION\n// ) &&\n// item.isVisible\n// ) {\n// matchingElements.push(item);\n// }\n// }\n// }\n\n// for (const child of node.children) {\n// dfs(child);\n// }\n// }\n\n// dfs(treeRoot);\n\n// if (matchingElements.length === 0) {\n// return undefined;\n// }\n\n// // Find the smallest element by area\n// const element = matchingElements.reduce((smallest, current) => {\n// const smallestArea = smallest.rect.width * smallest.rect.height;\n// const currentArea = current.rect.width * current.rect.height;\n// return currentArea < smallestArea ? current : smallest;\n// });\n\n// const distanceToCenter = distance(\n// { x: element.center[0], y: element.center[1] },\n// position,\n// );\n\n// if (requireStrictDistance) {\n// return distanceToCenter <= distanceThreshold ? element : undefined;\n// }\n\n// return element;\n// }\n\nexport function distance(\n point1: { x: number; y: number },\n point2: { x: number; y: number },\n) {\n return Math.sqrt((point1.x - point2.x) 2 + (point1.y - point2.y) 2);\n}\n\nexport const samplePageDescription = `\nAnd the page is described as follows:\n====================\nThe size of the page: 1280 x 720\nSome of the elements are marked with a rectangle in the screenshot corresponding to the markerId, some are not.\n\nDescription of all the elements in screenshot:\n<div id=\"969f1637\" markerId=\"1\" left=\"100\" top=\"100\" width=\"100\" height=\"100\"> // The markerId indicated by the rectangle label in the screenshot\n <h4 id=\"b211ecb2\" markerId=\"5\" left=\"150\" top=\"150\" width=\"90\" height=\"60\">\n The username is accepted\n </h4>\n ...many more\n</div>\n====================\n`;\n\nexport async function describeUserPage(context: UIContext) {\n return `The size of the page: ${describeSize(context.size)}`;\n}\n"],"names":["describeSize","size","describeElement","elements","sliceLength","item","distanceThreshold","distance","point1","point2","Math","samplePageDescription","describeUserPage","context"],"mappings":"AAIO,SAASA,aAAaC,IAAU;IACrC,OAAO,GAAGA,KAAK,KAAK,CAAC,GAAG,EAAEA,KAAK,MAAM,EAAE;AACzC;AAEO,SAASC,gBACdC,QAAoE;IAEpE,MAAMC,cAAc;IACpB,OAAOD,SACJ,GAAG,CAAC,CAACE,OACJ;YACEA,KAAK,EAAE;YACPA,KAAK,IAAI,CAAC,IAAI;YACdA,KAAK,IAAI,CAAC,GAAG;YACbA,KAAK,IAAI,CAAC,IAAI,GAAGA,KAAK,IAAI,CAAC,KAAK;YAChCA,KAAK,IAAI,CAAC,GAAG,GAAGA,KAAK,IAAI,CAAC,MAAM;YAChCA,KAAK,OAAO,CAAC,MAAM,GAAGD,cAClB,GAAGC,KAAK,OAAO,CAAC,KAAK,CAAC,GAAGD,aAAa,GAAG,CAAC,GAC1CC,KAAK,OAAO;SACjB,CAAC,IAAI,CAAC,OAER,IAAI,CAAC;AACV;AACO,MAAMC,oBAAoB;AAuE1B,SAASC,SACdC,MAAgC,EAChCC,MAAgC;IAEhC,OAAOC,KAAK,IAAI,CAAEF,AAAAA,CAAAA,OAAO,CAAC,GAAGC,OAAO,CAAC,AAAD,KAAM,IAAKD,AAAAA,CAAAA,OAAO,CAAC,GAAGC,OAAO,CAAC,AAAD,KAAM;AACzE;AAEO,MAAME,wBAAwB,CAAC;;;;;;;;;;;;;;AActC,CAAC;AAEM,eAAeC,iBAAiBC,OAAkB;IACvD,OAAO,CAAC,sBAAsB,EAAEb,aAAaa,QAAQ,IAAI,GAAG;AAC9D"}
1	+ {"version":3,"file":"ai-model/prompt/util.mjs","sources":["../../../../src/ai-model/prompt/util.ts"],"sourcesContent":["import type { BaseElement, ElementTreeNode, Size, UIContext } from '@/types';\nimport { NodeType } from '@midscene/shared/constants';\nimport { assert } from '@midscene/shared/utils';\n\nexport function describeSize(size: Size) {\n return `${size.width} x ${size.height}`;\n}\n\nexport function describeElement(\n elements: (Pick<BaseElement, 'rect' \| 'content'> & { id: string })[],\n) {\n const sliceLength = 80;\n return elements\n .map((item) =>\n [\n item.id,\n item.rect.left,\n item.rect.top,\n item.rect.left + item.rect.width,\n item.rect.top + item.rect.height,\n item.content.length > sliceLength\n ? `${item.content.slice(0, sliceLength)}...`\n : item.content,\n ].join(', '),\n )\n .join('\\n');\n}\nexport const distanceThreshold = 16;\n\n// export function elementByPositionWithElementInfo(\n// treeRoot: ElementTreeNode<BaseElement>,\n// position: {\n// x: number;\n// y: number;\n// },\n// options?: {\n// requireStrictDistance?: boolean;\n// filterPositionElements?: boolean;\n// },\n// ) {\n// const requireStrictDistance = options?.requireStrictDistance ?? true;\n// const filterPositionElements = options?.filterPositionElements ?? false;\n\n// assert(typeof position !== 'undefined', 'position is required for query');\n\n// const matchingElements: BaseElement[] = [];\n\n// function dfs(node: ElementTreeNode<BaseElement>) {\n// if (node?.node) {\n// const item = node.node;\n// if (\n// item.rect.left <= position.x &&\n// position.x <= item.rect.left + item.rect.width &&\n// item.rect.top <= position.y &&\n// position.y <= item.rect.top + item.rect.height\n// ) {\n// if (\n// !(\n// filterPositionElements &&\n// item.attributes?.nodeType === NodeType.POSITION\n// ) &&\n// item.isVisible\n// ) {\n// matchingElements.push(item);\n// }\n// }\n// }\n\n// for (const child of node.children) {\n// dfs(child);\n// }\n// }\n\n// dfs(treeRoot);\n\n// if (matchingElements.length === 0) {\n// return undefined;\n// }\n\n// // Find the smallest element by area\n// const element = matchingElements.reduce((smallest, current) => {\n// const smallestArea = smallest.rect.width * smallest.rect.height;\n// const currentArea = current.rect.width * current.rect.height;\n// return currentArea < smallestArea ? current : smallest;\n// });\n\n// const distanceToCenter = distance(\n// { x: element.center[0], y: element.center[1] },\n// position,\n// );\n\n// if (requireStrictDistance) {\n// return distanceToCenter <= distanceThreshold ? element : undefined;\n// }\n\n// return element;\n// }\n\nexport function distance(\n point1: { x: number; y: number },\n point2: { x: number; y: number },\n) {\n return Math.sqrt((point1.x - point2.x) 2 + (point1.y - point2.y) 2);\n}\n\nexport const samplePageDescription = `\nAnd the page is described as follows:\n====================\nThe size of the page: 1280 x 720\nSome of the elements are marked with a rectangle in the screenshot corresponding to the markerId, some are not.\n\nDescription of all the elements in screenshot:\n<div id=\"969f1637\" markerId=\"1\" left=\"100\" top=\"100\" width=\"100\" height=\"100\"> // The markerId indicated by the rectangle label in the screenshot\n <h4 id=\"b211ecb2\" markerId=\"5\" left=\"150\" top=\"150\" width=\"90\" height=\"60\">\n The username is accepted\n </h4>\n ...many more\n</div>\n====================\n`;\n\nexport async function describeUserPage(context: UIContext) {\n return `The size of the page: ${describeSize(context.size)}`;\n}\n"],"names":["describeSize","size","describeElement","elements","sliceLength","item","distanceThreshold","distance","point1","point2","Math","samplePageDescription","describeUserPage","context"],"mappings":"AAIO,SAASA,aAAaC,IAAU;IACrC,OAAO,GAAGA,KAAK,KAAK,CAAC,GAAG,EAAEA,KAAK,MAAM,EAAE;AACzC;AAEO,SAASC,gBACdC,QAAoE;IAEpE,MAAMC,cAAc;IACpB,OAAOD,SACJ,GAAG,CAAC,CAACE,OACJ;YACEA,KAAK,EAAE;YACPA,KAAK,IAAI,CAAC,IAAI;YACdA,KAAK,IAAI,CAAC,GAAG;YACbA,KAAK,IAAI,CAAC,IAAI,GAAGA,KAAK,IAAI,CAAC,KAAK;YAChCA,KAAK,IAAI,CAAC,GAAG,GAAGA,KAAK,IAAI,CAAC,MAAM;YAChCA,KAAK,OAAO,CAAC,MAAM,GAAGD,cAClB,GAAGC,KAAK,OAAO,CAAC,KAAK,CAAC,GAAGD,aAAa,GAAG,CAAC,GAC1CC,KAAK,OAAO;SACjB,CAAC,IAAI,CAAC,OAER,IAAI,CAAC;AACV;AACO,MAAMC,oBAAoB;AAuE1B,SAASC,SACdC,MAAgC,EAChCC,MAAgC;IAEhC,OAAOC,KAAK,IAAI,CAAEF,AAAAA,CAAAA,OAAO,CAAC,GAAGC,OAAO,CAAC,AAAD,KAAM,IAAKD,AAAAA,CAAAA,OAAO,CAAC,GAAGC,OAAO,CAAC,AAAD,KAAM;AACzE;AAEO,MAAME,wBAAwB,CAAC;;;;;;;;;;;;;;AActC,CAAC;AAEM,eAAeC,iBAAiBC,OAAkB;IACvD,OAAO,CAAC,sBAAsB,EAAEb,aAAaa,QAAQ,IAAI,GAAG;AAC9D"}

package/dist/es/ai-model/prompt/yaml-generator.mjs CHANGED Viewed

@@ -117,10 +117,10 @@ Event Summary:
 ${JSON.stringify(yamlSummary, null, 2)}
 Convert events:
-- navigation \u{2192} target.url
-- click \u{2192} aiTap with element description
-- input \u{2192} aiInput with value and locate
-- scroll \u{2192} aiScroll with appropriate direction
+- navigation → target.url
+- click → aiTap with element description
+- input → aiInput with value and locate
+- scroll → aiScroll with appropriate direction
 - Add aiAssert for important state changes
 Important: Return ONLY the raw YAML content. Do NOT wrap the response in markdown code blocks (no \`\`\`yaml or \`\`\`). Start directly with the YAML content.`
@@ -142,7 +142,7 @@ Important: Return ONLY the raw YAML content. Do NOT wrap the response in markdow
             });
         }
         const response = await callAIWithStringResponse(prompt, AIActionType.TEXT, modelConfig);
-        if ((null == response ? void 0 : response.content) && 'string' == typeof response.content) return response.content;
+        if (response?.content && 'string' == typeof response.content) return response.content;
         throw new Error('Failed to generate YAML test configuration');
     } catch (error) {
         throw new Error(`Failed to generate YAML test: ${error}`);
@@ -173,10 +173,10 @@ Event Summary:
 ${JSON.stringify(yamlSummary, null, 2)}
 Convert events:
-- navigation \u{2192} target.url
-- click \u{2192} aiTap with element description
-- input \u{2192} aiInput with value and locate
-- scroll \u{2192} aiScroll with appropriate direction
+- navigation → target.url
+- click → aiTap with element description
+- input → aiInput with value and locate
+- scroll → aiScroll with appropriate direction
 - Add aiAssert for important state changes
 Important: Return ONLY the raw YAML content. Do NOT wrap the response in markdown code blocks (no \`\`\`yaml or \`\`\`). Start directly with the YAML content.`
@@ -203,7 +203,7 @@ Important: Return ONLY the raw YAML content. Do NOT wrap the response in markdow
         });
         {
             const response = await callAIWithStringResponse(prompt, AIActionType.TEXT, modelConfig);
-            if ((null == response ? void 0 : response.content) && 'string' == typeof response.content) return {
+            if (response?.content && 'string' == typeof response.content) return {
                 content: response.content,
                 usage: response.usage,
                 isStreamed: false

package/dist/es/ai-model/prompt/yaml-generator.mjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"ai-model/prompt/yaml-generator.mjs","sources":["webpack://@midscene/core/./src/ai-model/prompt/yaml-generator.ts"],"sourcesContent":["import type {\n StreamingAIResponse,\n StreamingCodeGenerationOptions,\n} from '@/types';\nimport { YAML_EXAMPLE_CODE } from '@midscene/shared/constants';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport {\n AIActionType,\n type ChatCompletionMessageParam,\n callAI,\n callAIWithStringResponse,\n} from '../index';\n\n// Common interfaces for test generation (shared between YAML and Playwright)\nexport interface EventCounts {\n navigation: number;\n click: number;\n input: number;\n scroll: number;\n total: number;\n}\n\nexport interface InputDescription {\n description: string;\n value: string;\n}\n\nexport interface ProcessedEvent {\n type: string;\n timestamp: number;\n url?: string;\n title?: string;\n elementDescription?: string;\n value?: string;\n pageInfo?: any;\n elementRect?: any;\n}\n\nexport interface EventSummary {\n testName: string;\n startUrl: string;\n eventCounts: EventCounts;\n urls: string[];\n clickDescriptions: string[];\n inputDescriptions: InputDescription[];\n events: ProcessedEvent[];\n}\n\n// Common ChromeRecordedEvent interface\nexport interface ChromeRecordedEvent {\n type: string;\n timestamp: number;\n url?: string;\n title?: string;\n elementDescription?: string;\n value?: string;\n pageInfo?: any;\n elementRect?: any;\n screenshotBefore?: string;\n screenshotAfter?: string;\n screenshotWithBox?: string;\n}\n\nexport interface YamlGenerationOptions {\n testName?: string;\n includeTimestamps?: boolean;\n maxScreenshots?: number;\n description?: string;\n}\n\nexport interface FilteredEvents {\n navigationEvents: ChromeRecordedEvent[];\n clickEvents: ChromeRecordedEvent[];\n inputEvents: ChromeRecordedEvent[];\n scrollEvents: ChromeRecordedEvent[];\n}\n\n// Common utility functions (shared between YAML and Playwright generators)\n\n/*\n Get screenshots from events for LLM context\n /\nexport const getScreenshotsForLLM = (\n events: ChromeRecordedEvent[],\n maxScreenshots = 1,\n): string[] => {\n // Find events with screenshots, prioritizing navigation and click events\n const eventsWithScreenshots = events.filter(\n (event) =>\n event.screenshotBefore \|\|\n event.screenshotAfter \|\|\n event.screenshotWithBox,\n );\n\n // Sort them by priority (navigation first, then clicks, then others)\n const sortedEvents = [...eventsWithScreenshots].sort((a, b) => {\n if (a.type === 'navigation' && b.type !== 'navigation') return -1;\n if (a.type !== 'navigation' && b.type === 'navigation') return 1;\n if (a.type === 'click' && b.type !== 'click') return -1;\n if (a.type !== 'click' && b.type === 'click') return 1;\n return 0;\n });\n\n // Extract up to maxScreenshots screenshots\n const screenshots: string[] = [];\n for (const event of sortedEvents) {\n // Prefer the most informative screenshot\n const screenshot =\n event.screenshotWithBox \|\|\n event.screenshotAfter \|\|\n event.screenshotBefore;\n if (screenshot && !screenshots.includes(screenshot)) {\n screenshots.push(screenshot);\n if (screenshots.length >= maxScreenshots) break;\n }\n }\n\n return screenshots;\n};\n\n/\n Filter events by type for easier processing\n /\nexport const filterEventsByType = (\n events: ChromeRecordedEvent[],\n): FilteredEvents => {\n return {\n navigationEvents: events.filter((event) => event.type === 'navigation'),\n clickEvents: events.filter((event) => event.type === 'click'),\n inputEvents: events.filter((event) => event.type === 'input'),\n scrollEvents: events.filter((event) => event.type === 'scroll'),\n };\n};\n\n/\n Create event counts summary\n /\nexport const createEventCounts = (\n filteredEvents: FilteredEvents,\n totalEvents: number,\n): EventCounts => {\n return {\n navigation: filteredEvents.navigationEvents.length,\n click: filteredEvents.clickEvents.length,\n input: filteredEvents.inputEvents.length,\n scroll: filteredEvents.scrollEvents.length,\n total: totalEvents,\n };\n};\n\n/\n Extract input descriptions from input events\n /\nexport const extractInputDescriptions = (\n inputEvents: ChromeRecordedEvent[],\n): InputDescription[] => {\n return inputEvents\n .map((event) => ({\n description: event.elementDescription \|\| '',\n value: event.value \|\| '',\n }))\n .filter((item) => item.description && item.value);\n};\n\n/\n Process events for LLM consumption\n /\nexport const processEventsForLLM = (\n events: ChromeRecordedEvent[],\n): ProcessedEvent[] => {\n return events.map((event) => ({\n type: event.type,\n timestamp: event.timestamp,\n url: event.url,\n title: event.title,\n elementDescription: event.elementDescription,\n value: event.value,\n pageInfo: event.pageInfo,\n elementRect: event.elementRect,\n }));\n};\n\n/\n Prepare comprehensive event summary for LLM\n /\nexport const prepareEventSummary = (\n events: ChromeRecordedEvent[],\n options: { testName?: string; maxScreenshots?: number } = {},\n): EventSummary => {\n const filteredEvents = filterEventsByType(events);\n const eventCounts = createEventCounts(filteredEvents, events.length);\n\n // Extract useful information from events\n const startUrl =\n filteredEvents.navigationEvents.length > 0\n ? filteredEvents.navigationEvents[0].url \|\| ''\n : '';\n\n const clickDescriptions = filteredEvents.clickEvents\n .map((event) => event.elementDescription)\n .filter((desc): desc is string => Boolean(desc))\n .slice(0, 10);\n\n const inputDescriptions = extractInputDescriptions(\n filteredEvents.inputEvents,\n ).slice(0, 10);\n\n const urls = filteredEvents.navigationEvents\n .map((e) => e.url)\n .filter((url): url is string => Boolean(url))\n .slice(0, 5);\n\n const processedEvents = processEventsForLLM(events);\n\n return {\n testName: options.testName \|\| 'Automated test from recorded events',\n startUrl,\n eventCounts,\n urls,\n clickDescriptions,\n inputDescriptions,\n events: processedEvents,\n };\n};\n\n/\n Create message content for LLM with optional screenshots\n /\nexport const createMessageContent = (\n promptText: string,\n screenshots: string[] = [],\n includeScreenshots = true,\n) => {\n const messageContent: any[] = [\n {\n type: 'text',\n text: promptText,\n },\n ];\n\n // Add screenshots if available and requested\n if (includeScreenshots && screenshots.length > 0) {\n messageContent.unshift({\n type: 'text',\n text: 'Here are screenshots from the recording session to help you understand the context:',\n });\n\n screenshots.forEach((screenshot) => {\n messageContent.push({\n type: 'image_url',\n image_url: {\n url: screenshot,\n },\n });\n });\n }\n\n return messageContent;\n};\n\n/\n Validate events before processing\n /\nexport const validateEvents = (events: ChromeRecordedEvent[]): void => {\n if (!events.length) {\n throw new Error('No events provided for test generation');\n }\n};\n\n// YAML-specific generation functions\n\n/\n Generates YAML test configuration from recorded events using AI\n /\nexport const generateYamlTest = async (\n events: ChromeRecordedEvent[],\n options: YamlGenerationOptions,\n modelConfig: IModelConfig,\n): Promise<string> => {\n try {\n // Validate input\n validateEvents(events);\n\n // Prepare event summary using shared utilities\n const summary = prepareEventSummary(events, {\n testName: options.testName,\n maxScreenshots: options.maxScreenshots \|\| 3,\n });\n\n // Add YAML-specific options to summary\n const yamlSummary = {\n ...summary,\n includeTimestamps: options.includeTimestamps \|\| false,\n };\n\n // Get screenshots for visual context\n const screenshots = getScreenshotsForLLM(\n events,\n options.maxScreenshots \|\| 3,\n );\n\n // Use LLM to generate the YAML test configuration\n const prompt: ChatCompletionMessageParam[] = [\n {\n role: 'system',\n content: `You are an expert in Midscene.js YAML test generation. Generate clean, accurate YAML following these rules: ${YAML_EXAMPLE_CODE}`,\n },\n {\n role: 'user',\n content: `Generate YAML test for Midscene.js automation from recorded browser events.\n\nEvent Summary:\n${JSON.stringify(yamlSummary, null, 2)}\n\nConvert events:\n- navigation → target.url\n- click → aiTap with element description\n- input → aiInput with value and locate\n- scroll → aiScroll with appropriate direction\n- Add aiAssert for important state changes\n\nImportant: Return ONLY the raw YAML content. Do NOT wrap the response in markdown code blocks (no \\`\\`\\`yaml or \\`\\`\\`). Start directly with the YAML content.`,\n },\n ];\n\n // Add screenshots if available and requested\n if (screenshots.length > 0) {\n prompt.push({\n role: 'user',\n content:\n 'Here are screenshots from the recording session to help you understand the context:',\n });\n\n prompt.push({\n role: 'user',\n content: screenshots.map((screenshot) => ({\n type: 'image_url',\n image_url: {\n url: screenshot,\n },\n })),\n });\n }\n\n const response = await callAIWithStringResponse(\n prompt,\n AIActionType.TEXT,\n modelConfig,\n );\n\n if (response?.content && typeof response.content === 'string') {\n return response.content;\n }\n\n throw new Error('Failed to generate YAML test configuration');\n } catch (error) {\n throw new Error(`Failed to generate YAML test: ${error}`);\n }\n};\n\n/\n Generates YAML test configuration from recorded events using AI with streaming support\n */\nexport const generateYamlTestStream = async (\n events: ChromeRecordedEvent[],\n options: YamlGenerationOptions & StreamingCodeGenerationOptions,\n modelConfig: IModelConfig,\n): Promise<StreamingAIResponse> => {\n try {\n // Validate input\n validateEvents(events);\n\n // Prepare event summary using shared utilities\n const summary = prepareEventSummary(events, {\n testName: options.testName,\n maxScreenshots: options.maxScreenshots \|\| 3,\n });\n\n // Add YAML-specific options to summary\n const yamlSummary = {\n ...summary,\n includeTimestamps: options.includeTimestamps \|\| false,\n };\n\n // Get screenshots for visual context\n const screenshots = getScreenshotsForLLM(\n events,\n options.maxScreenshots \|\| 3,\n );\n\n // Use LLM to generate the YAML test configuration\n const prompt: ChatCompletionMessageParam[] = [\n {\n role: 'system',\n content: `You are an expert in Midscene.js YAML test generation. Generate clean, accurate YAML following these rules: ${YAML_EXAMPLE_CODE}`,\n },\n {\n role: 'user',\n content: `Generate YAML test for Midscene.js automation from recorded browser events.\n\nEvent Summary:\n${JSON.stringify(yamlSummary, null, 2)}\n\nConvert events:\n- navigation → target.url\n- click → aiTap with element description\n- input → aiInput with value and locate\n- scroll → aiScroll with appropriate direction\n- Add aiAssert for important state changes\n\nImportant: Return ONLY the raw YAML content. Do NOT wrap the response in markdown code blocks (no \\`\\`\\`yaml or \\`\\`\\`). Start directly with the YAML content.`,\n },\n ];\n\n // Add screenshots if available and requested\n if (screenshots.length > 0) {\n prompt.push({\n role: 'user',\n content:\n 'Here are screenshots from the recording session to help you understand the context:',\n });\n\n prompt.push({\n role: 'user',\n content: screenshots.map((screenshot) => ({\n type: 'image_url',\n image_url: {\n url: screenshot,\n },\n })),\n });\n }\n\n if (options.stream && options.onChunk) {\n // Use streaming\n return await callAI(prompt, AIActionType.TEXT, modelConfig, {\n stream: true,\n onChunk: options.onChunk,\n });\n } else {\n // Fallback to non-streaming\n const response = await callAIWithStringResponse(\n prompt,\n AIActionType.TEXT,\n modelConfig,\n );\n\n if (response?.content && typeof response.content === 'string') {\n return {\n content: response.content,\n usage: response.usage,\n isStreamed: false,\n };\n }\n\n throw new Error('Failed to generate YAML test configuration');\n }\n } catch (error) {\n throw new Error(`Failed to generate YAML test: ${error}`);\n }\n};\n"],"names":["getScreenshotsForLLM","events","maxScreenshots","eventsWithScreenshots","event","sortedEvents","a","b","screenshots","screenshot","filterEventsByType","createEventCounts","filteredEvents","totalEvents","extractInputDescriptions","inputEvents","item","processEventsForLLM","prepareEventSummary","options","eventCounts","startUrl","clickDescriptions","desc","Boolean","inputDescriptions","urls","e","url","processedEvents","createMessageContent","promptText","includeScreenshots","messageContent","validateEvents","Error","generateYamlTest","modelConfig","summary","yamlSummary","prompt","YAML_EXAMPLE_CODE","JSON","response","callAIWithStringResponse","AIActionType","error","generateYamlTestStream","callAI"],"mappings":";;AAkFO,MAAMA,uBAAuB,CAClCC,QACAC,iBAAiB,CAAC;IAGlB,MAAMC,wBAAwBF,OAAO,MAAM,CACzC,CAACG,QACCA,MAAM,gBAAgB,IACtBA,MAAM,eAAe,IACrBA,MAAM,iBAAiB;IAI3B,MAAMC,eAAe;WAAIF;KAAsB,CAAC,IAAI,CAAC,CAACG,GAAGC;QACvD,IAAID,AAAW,iBAAXA,EAAE,IAAI,IAAqBC,AAAW,iBAAXA,EAAE,IAAI,EAAmB,OAAO;QAC/D,IAAID,AAAW,iBAAXA,EAAE,IAAI,IAAqBC,AAAW,iBAAXA,EAAE,IAAI,EAAmB,OAAO;QAC/D,IAAID,AAAW,YAAXA,EAAE,IAAI,IAAgBC,AAAW,YAAXA,EAAE,IAAI,EAAc,OAAO;QACrD,IAAID,AAAW,YAAXA,EAAE,IAAI,IAAgBC,AAAW,YAAXA,EAAE,IAAI,EAAc,OAAO;QACrD,OAAO;IACT;IAGA,MAAMC,cAAwB,EAAE;IAChC,KAAK,MAAMJ,SAASC,aAAc;QAEhC,MAAMI,aACJL,MAAM,iBAAiB,IACvBA,MAAM,eAAe,IACrBA,MAAM,gBAAgB;QACxB,IAAIK,cAAc,CAACD,YAAY,QAAQ,CAACC,aAAa;YACnDD,YAAY,IAAI,CAACC;YACjB,IAAID,YAAY,MAAM,IAAIN,gBAAgB;QAC5C;IACF;IAEA,OAAOM;AACT;AAKO,MAAME,qBAAqB,CAChCT,SAEO;QACL,kBAAkBA,OAAO,MAAM,CAAC,CAACG,QAAUA,AAAe,iBAAfA,MAAM,IAAI;QACrD,aAAaH,OAAO,MAAM,CAAC,CAACG,QAAUA,AAAe,YAAfA,MAAM,IAAI;QAChD,aAAaH,OAAO,MAAM,CAAC,CAACG,QAAUA,AAAe,YAAfA,MAAM,IAAI;QAChD,cAAcH,OAAO,MAAM,CAAC,CAACG,QAAUA,AAAe,aAAfA,MAAM,IAAI;IACnD;AAMK,MAAMO,oBAAoB,CAC/BC,gBACAC,cAEO;QACL,YAAYD,eAAe,gBAAgB,CAAC,MAAM;QAClD,OAAOA,eAAe,WAAW,CAAC,MAAM;QACxC,OAAOA,eAAe,WAAW,CAAC,MAAM;QACxC,QAAQA,eAAe,YAAY,CAAC,MAAM;QAC1C,OAAOC;IACT;AAMK,MAAMC,2BAA2B,CACtCC,cAEOA,YACJ,GAAG,CAAC,CAACX,QAAW;YACf,aAAaA,MAAM,kBAAkB,IAAI;YACzC,OAAOA,MAAM,KAAK,IAAI;QACxB,IACC,MAAM,CAAC,CAACY,OAASA,KAAK,WAAW,IAAIA,KAAK,KAAK;AAM7C,MAAMC,sBAAsB,CACjChB,SAEOA,OAAO,GAAG,CAAC,CAACG,QAAW;YAC5B,MAAMA,MAAM,IAAI;YAChB,WAAWA,MAAM,SAAS;YAC1B,KAAKA,MAAM,GAAG;YACd,OAAOA,MAAM,KAAK;YAClB,oBAAoBA,MAAM,kBAAkB;YAC5C,OAAOA,MAAM,KAAK;YAClB,UAAUA,MAAM,QAAQ;YACxB,aAAaA,MAAM,WAAW;QAChC;AAMK,MAAMc,sBAAsB,CACjCjB,QACAkB,UAA0D,CAAC,CAAC;IAE5D,MAAMP,iBAAiBF,mBAAmBT;IAC1C,MAAMmB,cAAcT,kBAAkBC,gBAAgBX,OAAO,MAAM;IAGnE,MAAMoB,WACJT,eAAe,gBAAgB,CAAC,MAAM,GAAG,IACrCA,eAAe,gBAAgB,CAAC,EAAE,CAAC,GAAG,IAAI,KAC1C;IAEN,MAAMU,oBAAoBV,eAAe,WAAW,CACjD,GAAG,CAAC,CAACR,QAAUA,MAAM,kBAAkB,EACvC,MAAM,CAAC,CAACmB,OAAyBC,QAAQD,OACzC,KAAK,CAAC,GAAG;IAEZ,MAAME,oBAAoBX,yBACxBF,eAAe,WAAW,EAC1B,KAAK,CAAC,GAAG;IAEX,MAAMc,OAAOd,eAAe,gBAAgB,CACzC,GAAG,CAAC,CAACe,IAAMA,EAAE,GAAG,EAChB,MAAM,CAAC,CAACC,MAAuBJ,QAAQI,MACvC,KAAK,CAAC,GAAG;IAEZ,MAAMC,kBAAkBZ,oBAAoBhB;IAE5C,OAAO;QACL,UAAUkB,QAAQ,QAAQ,IAAI;QAC9BE;QACAD;QACAM;QACAJ;QACAG;QACA,QAAQI;IACV;AACF;AAKO,MAAMC,uBAAuB,CAClCC,YACAvB,cAAwB,EAAE,EAC1BwB,qBAAqB,IAAI;IAEzB,MAAMC,iBAAwB;QAC5B;YACE,MAAM;YACN,MAAMF;QACR;KACD;IAGD,IAAIC,sBAAsBxB,YAAY,MAAM,GAAG,GAAG;QAChDyB,eAAe,OAAO,CAAC;YACrB,MAAM;YACN,MAAM;QACR;QAEAzB,YAAY,OAAO,CAAC,CAACC;YACnBwB,eAAe,IAAI,CAAC;gBAClB,MAAM;gBACN,WAAW;oBACT,KAAKxB;gBACP;YACF;QACF;IACF;IAEA,OAAOwB;AACT;AAKO,MAAMC,iBAAiB,CAACjC;IAC7B,IAAI,CAACA,OAAO,MAAM,EAChB,MAAM,IAAIkC,MAAM;AAEpB;AAOO,MAAMC,mBAAmB,OAC9BnC,QACAkB,SACAkB;IAEA,IAAI;QAEFH,eAAejC;QAGf,MAAMqC,UAAUpB,oBAAoBjB,QAAQ;YAC1C,UAAUkB,QAAQ,QAAQ;YAC1B,gBAAgBA,QAAQ,cAAc,IAAI;QAC5C;QAGA,MAAMoB,cAAc;YAClB,GAAGD,OAAO;YACV,mBAAmBnB,QAAQ,iBAAiB,IAAI;QAClD;QAGA,MAAMX,cAAcR,qBAClBC,QACAkB,QAAQ,cAAc,IAAI;QAI5B,MAAMqB,SAAuC;YAC3C;gBACE,MAAM;gBACN,SAAS,CAAC,4GAA4G,EAAEC,mBAAmB;YAC7I;YACA;gBACE,MAAM;gBACN,SAAS,CAAC;;;AAGlB,EAAEC,KAAK,SAAS,CAACH,aAAa,MAAM,GAAG;;;;;;;;;8JASuH,CAAC;YACzJ;SACD;QAGD,IAAI/B,YAAY,MAAM,GAAG,GAAG;YAC1BgC,OAAO,IAAI,CAAC;gBACV,MAAM;gBACN,SACE;YACJ;YAEAA,OAAO,IAAI,CAAC;gBACV,MAAM;gBACN,SAAShC,YAAY,GAAG,CAAC,CAACC,aAAgB;wBACxC,MAAM;wBACN,WAAW;4BACT,KAAKA;wBACP;oBACF;YACF;QACF;QAEA,MAAMkC,WAAW,MAAMC,yBACrBJ,QACAK,aAAa,IAAI,EACjBR;QAGF,IAAIM,AAAAA,CAAAA,QAAAA,WAAAA,KAAAA,IAAAA,SAAU,OAAO,AAAD,KAAK,AAA4B,YAA5B,OAAOA,SAAS,OAAO,EAC9C,OAAOA,SAAS,OAAO;QAGzB,MAAM,IAAIR,MAAM;IAClB,EAAE,OAAOW,OAAO;QACd,MAAM,IAAIX,MAAM,CAAC,8BAA8B,EAAEW,OAAO;IAC1D;AACF;AAKO,MAAMC,yBAAyB,OACpC9C,QACAkB,SACAkB;IAEA,IAAI;QAEFH,eAAejC;QAGf,MAAMqC,UAAUpB,oBAAoBjB,QAAQ;YAC1C,UAAUkB,QAAQ,QAAQ;YAC1B,gBAAgBA,QAAQ,cAAc,IAAI;QAC5C;QAGA,MAAMoB,cAAc;YAClB,GAAGD,OAAO;YACV,mBAAmBnB,QAAQ,iBAAiB,IAAI;QAClD;QAGA,MAAMX,cAAcR,qBAClBC,QACAkB,QAAQ,cAAc,IAAI;QAI5B,MAAMqB,SAAuC;YAC3C;gBACE,MAAM;gBACN,SAAS,CAAC,4GAA4G,EAAEC,mBAAmB;YAC7I;YACA;gBACE,MAAM;gBACN,SAAS,CAAC;;;AAGlB,EAAEC,KAAK,SAAS,CAACH,aAAa,MAAM,GAAG;;;;;;;;;8JASuH,CAAC;YACzJ;SACD;QAGD,IAAI/B,YAAY,MAAM,GAAG,GAAG;YAC1BgC,OAAO,IAAI,CAAC;gBACV,MAAM;gBACN,SACE;YACJ;YAEAA,OAAO,IAAI,CAAC;gBACV,MAAM;gBACN,SAAShC,YAAY,GAAG,CAAC,CAACC,aAAgB;wBACxC,MAAM;wBACN,WAAW;4BACT,KAAKA;wBACP;oBACF;YACF;QACF;QAEA,IAAIU,QAAQ,MAAM,IAAIA,QAAQ,OAAO,EAEnC,OAAO,MAAM6B,OAAOR,QAAQK,aAAa,IAAI,EAAER,aAAa;YAC1D,QAAQ;YACR,SAASlB,QAAQ,OAAO;QAC1B;QACK;YAEL,MAAMwB,WAAW,MAAMC,yBACrBJ,QACAK,aAAa,IAAI,EACjBR;YAGF,IAAIM,AAAAA,CAAAA,QAAAA,WAAAA,KAAAA,IAAAA,SAAU,OAAO,AAAD,KAAK,AAA4B,YAA5B,OAAOA,SAAS,OAAO,EAC9C,OAAO;gBACL,SAASA,SAAS,OAAO;gBACzB,OAAOA,SAAS,KAAK;gBACrB,YAAY;YACd;YAGF,MAAM,IAAIR,MAAM;QAClB;IACF,EAAE,OAAOW,OAAO;QACd,MAAM,IAAIX,MAAM,CAAC,8BAA8B,EAAEW,OAAO;IAC1D;AACF"}
1	+ {"version":3,"file":"ai-model/prompt/yaml-generator.mjs","sources":["../../../../src/ai-model/prompt/yaml-generator.ts"],"sourcesContent":["import type {\n StreamingAIResponse,\n StreamingCodeGenerationOptions,\n} from '@/types';\nimport { YAML_EXAMPLE_CODE } from '@midscene/shared/constants';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport {\n AIActionType,\n type ChatCompletionMessageParam,\n callAI,\n callAIWithStringResponse,\n} from '../index';\n\n// Common interfaces for test generation (shared between YAML and Playwright)\nexport interface EventCounts {\n navigation: number;\n click: number;\n input: number;\n scroll: number;\n total: number;\n}\n\nexport interface InputDescription {\n description: string;\n value: string;\n}\n\nexport interface ProcessedEvent {\n type: string;\n timestamp: number;\n url?: string;\n title?: string;\n elementDescription?: string;\n value?: string;\n pageInfo?: any;\n elementRect?: any;\n}\n\nexport interface EventSummary {\n testName: string;\n startUrl: string;\n eventCounts: EventCounts;\n urls: string[];\n clickDescriptions: string[];\n inputDescriptions: InputDescription[];\n events: ProcessedEvent[];\n}\n\n// Common ChromeRecordedEvent interface\nexport interface ChromeRecordedEvent {\n type: string;\n timestamp: number;\n url?: string;\n title?: string;\n elementDescription?: string;\n value?: string;\n pageInfo?: any;\n elementRect?: any;\n screenshotBefore?: string;\n screenshotAfter?: string;\n screenshotWithBox?: string;\n}\n\nexport interface YamlGenerationOptions {\n testName?: string;\n includeTimestamps?: boolean;\n maxScreenshots?: number;\n description?: string;\n}\n\nexport interface FilteredEvents {\n navigationEvents: ChromeRecordedEvent[];\n clickEvents: ChromeRecordedEvent[];\n inputEvents: ChromeRecordedEvent[];\n scrollEvents: ChromeRecordedEvent[];\n}\n\n// Common utility functions (shared between YAML and Playwright generators)\n\n/*\n Get screenshots from events for LLM context\n /\nexport const getScreenshotsForLLM = (\n events: ChromeRecordedEvent[],\n maxScreenshots = 1,\n): string[] => {\n // Find events with screenshots, prioritizing navigation and click events\n const eventsWithScreenshots = events.filter(\n (event) =>\n event.screenshotBefore \|\|\n event.screenshotAfter \|\|\n event.screenshotWithBox,\n );\n\n // Sort them by priority (navigation first, then clicks, then others)\n const sortedEvents = [...eventsWithScreenshots].sort((a, b) => {\n if (a.type === 'navigation' && b.type !== 'navigation') return -1;\n if (a.type !== 'navigation' && b.type === 'navigation') return 1;\n if (a.type === 'click' && b.type !== 'click') return -1;\n if (a.type !== 'click' && b.type === 'click') return 1;\n return 0;\n });\n\n // Extract up to maxScreenshots screenshots\n const screenshots: string[] = [];\n for (const event of sortedEvents) {\n // Prefer the most informative screenshot\n const screenshot =\n event.screenshotWithBox \|\|\n event.screenshotAfter \|\|\n event.screenshotBefore;\n if (screenshot && !screenshots.includes(screenshot)) {\n screenshots.push(screenshot);\n if (screenshots.length >= maxScreenshots) break;\n }\n }\n\n return screenshots;\n};\n\n/\n Filter events by type for easier processing\n /\nexport const filterEventsByType = (\n events: ChromeRecordedEvent[],\n): FilteredEvents => {\n return {\n navigationEvents: events.filter((event) => event.type === 'navigation'),\n clickEvents: events.filter((event) => event.type === 'click'),\n inputEvents: events.filter((event) => event.type === 'input'),\n scrollEvents: events.filter((event) => event.type === 'scroll'),\n };\n};\n\n/\n Create event counts summary\n /\nexport const createEventCounts = (\n filteredEvents: FilteredEvents,\n totalEvents: number,\n): EventCounts => {\n return {\n navigation: filteredEvents.navigationEvents.length,\n click: filteredEvents.clickEvents.length,\n input: filteredEvents.inputEvents.length,\n scroll: filteredEvents.scrollEvents.length,\n total: totalEvents,\n };\n};\n\n/\n Extract input descriptions from input events\n /\nexport const extractInputDescriptions = (\n inputEvents: ChromeRecordedEvent[],\n): InputDescription[] => {\n return inputEvents\n .map((event) => ({\n description: event.elementDescription \|\| '',\n value: event.value \|\| '',\n }))\n .filter((item) => item.description && item.value);\n};\n\n/\n Process events for LLM consumption\n /\nexport const processEventsForLLM = (\n events: ChromeRecordedEvent[],\n): ProcessedEvent[] => {\n return events.map((event) => ({\n type: event.type,\n timestamp: event.timestamp,\n url: event.url,\n title: event.title,\n elementDescription: event.elementDescription,\n value: event.value,\n pageInfo: event.pageInfo,\n elementRect: event.elementRect,\n }));\n};\n\n/\n Prepare comprehensive event summary for LLM\n /\nexport const prepareEventSummary = (\n events: ChromeRecordedEvent[],\n options: { testName?: string; maxScreenshots?: number } = {},\n): EventSummary => {\n const filteredEvents = filterEventsByType(events);\n const eventCounts = createEventCounts(filteredEvents, events.length);\n\n // Extract useful information from events\n const startUrl =\n filteredEvents.navigationEvents.length > 0\n ? filteredEvents.navigationEvents[0].url \|\| ''\n : '';\n\n const clickDescriptions = filteredEvents.clickEvents\n .map((event) => event.elementDescription)\n .filter((desc): desc is string => Boolean(desc))\n .slice(0, 10);\n\n const inputDescriptions = extractInputDescriptions(\n filteredEvents.inputEvents,\n ).slice(0, 10);\n\n const urls = filteredEvents.navigationEvents\n .map((e) => e.url)\n .filter((url): url is string => Boolean(url))\n .slice(0, 5);\n\n const processedEvents = processEventsForLLM(events);\n\n return {\n testName: options.testName \|\| 'Automated test from recorded events',\n startUrl,\n eventCounts,\n urls,\n clickDescriptions,\n inputDescriptions,\n events: processedEvents,\n };\n};\n\n/\n Create message content for LLM with optional screenshots\n /\nexport const createMessageContent = (\n promptText: string,\n screenshots: string[] = [],\n includeScreenshots = true,\n) => {\n const messageContent: any[] = [\n {\n type: 'text',\n text: promptText,\n },\n ];\n\n // Add screenshots if available and requested\n if (includeScreenshots && screenshots.length > 0) {\n messageContent.unshift({\n type: 'text',\n text: 'Here are screenshots from the recording session to help you understand the context:',\n });\n\n screenshots.forEach((screenshot) => {\n messageContent.push({\n type: 'image_url',\n image_url: {\n url: screenshot,\n },\n });\n });\n }\n\n return messageContent;\n};\n\n/\n Validate events before processing\n /\nexport const validateEvents = (events: ChromeRecordedEvent[]): void => {\n if (!events.length) {\n throw new Error('No events provided for test generation');\n }\n};\n\n// YAML-specific generation functions\n\n/\n Generates YAML test configuration from recorded events using AI\n /\nexport const generateYamlTest = async (\n events: ChromeRecordedEvent[],\n options: YamlGenerationOptions,\n modelConfig: IModelConfig,\n): Promise<string> => {\n try {\n // Validate input\n validateEvents(events);\n\n // Prepare event summary using shared utilities\n const summary = prepareEventSummary(events, {\n testName: options.testName,\n maxScreenshots: options.maxScreenshots \|\| 3,\n });\n\n // Add YAML-specific options to summary\n const yamlSummary = {\n ...summary,\n includeTimestamps: options.includeTimestamps \|\| false,\n };\n\n // Get screenshots for visual context\n const screenshots = getScreenshotsForLLM(\n events,\n options.maxScreenshots \|\| 3,\n );\n\n // Use LLM to generate the YAML test configuration\n const prompt: ChatCompletionMessageParam[] = [\n {\n role: 'system',\n content: `You are an expert in Midscene.js YAML test generation. Generate clean, accurate YAML following these rules: ${YAML_EXAMPLE_CODE}`,\n },\n {\n role: 'user',\n content: `Generate YAML test for Midscene.js automation from recorded browser events.\n\nEvent Summary:\n${JSON.stringify(yamlSummary, null, 2)}\n\nConvert events:\n- navigation → target.url\n- click → aiTap with element description\n- input → aiInput with value and locate\n- scroll → aiScroll with appropriate direction\n- Add aiAssert for important state changes\n\nImportant: Return ONLY the raw YAML content. Do NOT wrap the response in markdown code blocks (no \\`\\`\\`yaml or \\`\\`\\`). Start directly with the YAML content.`,\n },\n ];\n\n // Add screenshots if available and requested\n if (screenshots.length > 0) {\n prompt.push({\n role: 'user',\n content:\n 'Here are screenshots from the recording session to help you understand the context:',\n });\n\n prompt.push({\n role: 'user',\n content: screenshots.map((screenshot) => ({\n type: 'image_url',\n image_url: {\n url: screenshot,\n },\n })),\n });\n }\n\n const response = await callAIWithStringResponse(\n prompt,\n AIActionType.TEXT,\n modelConfig,\n );\n\n if (response?.content && typeof response.content === 'string') {\n return response.content;\n }\n\n throw new Error('Failed to generate YAML test configuration');\n } catch (error) {\n throw new Error(`Failed to generate YAML test: ${error}`);\n }\n};\n\n/\n Generates YAML test configuration from recorded events using AI with streaming support\n */\nexport const generateYamlTestStream = async (\n events: ChromeRecordedEvent[],\n options: YamlGenerationOptions & StreamingCodeGenerationOptions,\n modelConfig: IModelConfig,\n): Promise<StreamingAIResponse> => {\n try {\n // Validate input\n validateEvents(events);\n\n // Prepare event summary using shared utilities\n const summary = prepareEventSummary(events, {\n testName: options.testName,\n maxScreenshots: options.maxScreenshots \|\| 3,\n });\n\n // Add YAML-specific options to summary\n const yamlSummary = {\n ...summary,\n includeTimestamps: options.includeTimestamps \|\| false,\n };\n\n // Get screenshots for visual context\n const screenshots = getScreenshotsForLLM(\n events,\n options.maxScreenshots \|\| 3,\n );\n\n // Use LLM to generate the YAML test configuration\n const prompt: ChatCompletionMessageParam[] = [\n {\n role: 'system',\n content: `You are an expert in Midscene.js YAML test generation. Generate clean, accurate YAML following these rules: ${YAML_EXAMPLE_CODE}`,\n },\n {\n role: 'user',\n content: `Generate YAML test for Midscene.js automation from recorded browser events.\n\nEvent Summary:\n${JSON.stringify(yamlSummary, null, 2)}\n\nConvert events:\n- navigation → target.url\n- click → aiTap with element description\n- input → aiInput with value and locate\n- scroll → aiScroll with appropriate direction\n- Add aiAssert for important state changes\n\nImportant: Return ONLY the raw YAML content. Do NOT wrap the response in markdown code blocks (no \\`\\`\\`yaml or \\`\\`\\`). Start directly with the YAML content.`,\n },\n ];\n\n // Add screenshots if available and requested\n if (screenshots.length > 0) {\n prompt.push({\n role: 'user',\n content:\n 'Here are screenshots from the recording session to help you understand the context:',\n });\n\n prompt.push({\n role: 'user',\n content: screenshots.map((screenshot) => ({\n type: 'image_url',\n image_url: {\n url: screenshot,\n },\n })),\n });\n }\n\n if (options.stream && options.onChunk) {\n // Use streaming\n return await callAI(prompt, AIActionType.TEXT, modelConfig, {\n stream: true,\n onChunk: options.onChunk,\n });\n } else {\n // Fallback to non-streaming\n const response = await callAIWithStringResponse(\n prompt,\n AIActionType.TEXT,\n modelConfig,\n );\n\n if (response?.content && typeof response.content === 'string') {\n return {\n content: response.content,\n usage: response.usage,\n isStreamed: false,\n };\n }\n\n throw new Error('Failed to generate YAML test configuration');\n }\n } catch (error) {\n throw new Error(`Failed to generate YAML test: ${error}`);\n }\n};\n"],"names":["getScreenshotsForLLM","events","maxScreenshots","eventsWithScreenshots","event","sortedEvents","a","b","screenshots","screenshot","filterEventsByType","createEventCounts","filteredEvents","totalEvents","extractInputDescriptions","inputEvents","item","processEventsForLLM","prepareEventSummary","options","eventCounts","startUrl","clickDescriptions","desc","Boolean","inputDescriptions","urls","e","url","processedEvents","createMessageContent","promptText","includeScreenshots","messageContent","validateEvents","Error","generateYamlTest","modelConfig","summary","yamlSummary","prompt","YAML_EXAMPLE_CODE","JSON","response","callAIWithStringResponse","AIActionType","error","generateYamlTestStream","callAI"],"mappings":";;AAkFO,MAAMA,uBAAuB,CAClCC,QACAC,iBAAiB,CAAC;IAGlB,MAAMC,wBAAwBF,OAAO,MAAM,CACzC,CAACG,QACCA,MAAM,gBAAgB,IACtBA,MAAM,eAAe,IACrBA,MAAM,iBAAiB;IAI3B,MAAMC,eAAe;WAAIF;KAAsB,CAAC,IAAI,CAAC,CAACG,GAAGC;QACvD,IAAID,AAAW,iBAAXA,EAAE,IAAI,IAAqBC,AAAW,iBAAXA,EAAE,IAAI,EAAmB,OAAO;QAC/D,IAAID,AAAW,iBAAXA,EAAE,IAAI,IAAqBC,AAAW,iBAAXA,EAAE,IAAI,EAAmB,OAAO;QAC/D,IAAID,AAAW,YAAXA,EAAE,IAAI,IAAgBC,AAAW,YAAXA,EAAE,IAAI,EAAc,OAAO;QACrD,IAAID,AAAW,YAAXA,EAAE,IAAI,IAAgBC,AAAW,YAAXA,EAAE,IAAI,EAAc,OAAO;QACrD,OAAO;IACT;IAGA,MAAMC,cAAwB,EAAE;IAChC,KAAK,MAAMJ,SAASC,aAAc;QAEhC,MAAMI,aACJL,MAAM,iBAAiB,IACvBA,MAAM,eAAe,IACrBA,MAAM,gBAAgB;QACxB,IAAIK,cAAc,CAACD,YAAY,QAAQ,CAACC,aAAa;YACnDD,YAAY,IAAI,CAACC;YACjB,IAAID,YAAY,MAAM,IAAIN,gBAAgB;QAC5C;IACF;IAEA,OAAOM;AACT;AAKO,MAAME,qBAAqB,CAChCT,SAEO;QACL,kBAAkBA,OAAO,MAAM,CAAC,CAACG,QAAUA,AAAe,iBAAfA,MAAM,IAAI;QACrD,aAAaH,OAAO,MAAM,CAAC,CAACG,QAAUA,AAAe,YAAfA,MAAM,IAAI;QAChD,aAAaH,OAAO,MAAM,CAAC,CAACG,QAAUA,AAAe,YAAfA,MAAM,IAAI;QAChD,cAAcH,OAAO,MAAM,CAAC,CAACG,QAAUA,AAAe,aAAfA,MAAM,IAAI;IACnD;AAMK,MAAMO,oBAAoB,CAC/BC,gBACAC,cAEO;QACL,YAAYD,eAAe,gBAAgB,CAAC,MAAM;QAClD,OAAOA,eAAe,WAAW,CAAC,MAAM;QACxC,OAAOA,eAAe,WAAW,CAAC,MAAM;QACxC,QAAQA,eAAe,YAAY,CAAC,MAAM;QAC1C,OAAOC;IACT;AAMK,MAAMC,2BAA2B,CACtCC,cAEOA,YACJ,GAAG,CAAC,CAACX,QAAW;YACf,aAAaA,MAAM,kBAAkB,IAAI;YACzC,OAAOA,MAAM,KAAK,IAAI;QACxB,IACC,MAAM,CAAC,CAACY,OAASA,KAAK,WAAW,IAAIA,KAAK,KAAK;AAM7C,MAAMC,sBAAsB,CACjChB,SAEOA,OAAO,GAAG,CAAC,CAACG,QAAW;YAC5B,MAAMA,MAAM,IAAI;YAChB,WAAWA,MAAM,SAAS;YAC1B,KAAKA,MAAM,GAAG;YACd,OAAOA,MAAM,KAAK;YAClB,oBAAoBA,MAAM,kBAAkB;YAC5C,OAAOA,MAAM,KAAK;YAClB,UAAUA,MAAM,QAAQ;YACxB,aAAaA,MAAM,WAAW;QAChC;AAMK,MAAMc,sBAAsB,CACjCjB,QACAkB,UAA0D,CAAC,CAAC;IAE5D,MAAMP,iBAAiBF,mBAAmBT;IAC1C,MAAMmB,cAAcT,kBAAkBC,gBAAgBX,OAAO,MAAM;IAGnE,MAAMoB,WACJT,eAAe,gBAAgB,CAAC,MAAM,GAAG,IACrCA,eAAe,gBAAgB,CAAC,EAAE,CAAC,GAAG,IAAI,KAC1C;IAEN,MAAMU,oBAAoBV,eAAe,WAAW,CACjD,GAAG,CAAC,CAACR,QAAUA,MAAM,kBAAkB,EACvC,MAAM,CAAC,CAACmB,OAAyBC,QAAQD,OACzC,KAAK,CAAC,GAAG;IAEZ,MAAME,oBAAoBX,yBACxBF,eAAe,WAAW,EAC1B,KAAK,CAAC,GAAG;IAEX,MAAMc,OAAOd,eAAe,gBAAgB,CACzC,GAAG,CAAC,CAACe,IAAMA,EAAE,GAAG,EAChB,MAAM,CAAC,CAACC,MAAuBJ,QAAQI,MACvC,KAAK,CAAC,GAAG;IAEZ,MAAMC,kBAAkBZ,oBAAoBhB;IAE5C,OAAO;QACL,UAAUkB,QAAQ,QAAQ,IAAI;QAC9BE;QACAD;QACAM;QACAJ;QACAG;QACA,QAAQI;IACV;AACF;AAKO,MAAMC,uBAAuB,CAClCC,YACAvB,cAAwB,EAAE,EAC1BwB,qBAAqB,IAAI;IAEzB,MAAMC,iBAAwB;QAC5B;YACE,MAAM;YACN,MAAMF;QACR;KACD;IAGD,IAAIC,sBAAsBxB,YAAY,MAAM,GAAG,GAAG;QAChDyB,eAAe,OAAO,CAAC;YACrB,MAAM;YACN,MAAM;QACR;QAEAzB,YAAY,OAAO,CAAC,CAACC;YACnBwB,eAAe,IAAI,CAAC;gBAClB,MAAM;gBACN,WAAW;oBACT,KAAKxB;gBACP;YACF;QACF;IACF;IAEA,OAAOwB;AACT;AAKO,MAAMC,iBAAiB,CAACjC;IAC7B,IAAI,CAACA,OAAO,MAAM,EAChB,MAAM,IAAIkC,MAAM;AAEpB;AAOO,MAAMC,mBAAmB,OAC9BnC,QACAkB,SACAkB;IAEA,IAAI;QAEFH,eAAejC;QAGf,MAAMqC,UAAUpB,oBAAoBjB,QAAQ;YAC1C,UAAUkB,QAAQ,QAAQ;YAC1B,gBAAgBA,QAAQ,cAAc,IAAI;QAC5C;QAGA,MAAMoB,cAAc;YAClB,GAAGD,OAAO;YACV,mBAAmBnB,QAAQ,iBAAiB,IAAI;QAClD;QAGA,MAAMX,cAAcR,qBAClBC,QACAkB,QAAQ,cAAc,IAAI;QAI5B,MAAMqB,SAAuC;YAC3C;gBACE,MAAM;gBACN,SAAS,CAAC,4GAA4G,EAAEC,mBAAmB;YAC7I;YACA;gBACE,MAAM;gBACN,SAAS,CAAC;;;AAGlB,EAAEC,KAAK,SAAS,CAACH,aAAa,MAAM,GAAG;;;;;;;;;8JASuH,CAAC;YACzJ;SACD;QAGD,IAAI/B,YAAY,MAAM,GAAG,GAAG;YAC1BgC,OAAO,IAAI,CAAC;gBACV,MAAM;gBACN,SACE;YACJ;YAEAA,OAAO,IAAI,CAAC;gBACV,MAAM;gBACN,SAAShC,YAAY,GAAG,CAAC,CAACC,aAAgB;wBACxC,MAAM;wBACN,WAAW;4BACT,KAAKA;wBACP;oBACF;YACF;QACF;QAEA,MAAMkC,WAAW,MAAMC,yBACrBJ,QACAK,aAAa,IAAI,EACjBR;QAGF,IAAIM,UAAU,WAAW,AAA4B,YAA5B,OAAOA,SAAS,OAAO,EAC9C,OAAOA,SAAS,OAAO;QAGzB,MAAM,IAAIR,MAAM;IAClB,EAAE,OAAOW,OAAO;QACd,MAAM,IAAIX,MAAM,CAAC,8BAA8B,EAAEW,OAAO;IAC1D;AACF;AAKO,MAAMC,yBAAyB,OACpC9C,QACAkB,SACAkB;IAEA,IAAI;QAEFH,eAAejC;QAGf,MAAMqC,UAAUpB,oBAAoBjB,QAAQ;YAC1C,UAAUkB,QAAQ,QAAQ;YAC1B,gBAAgBA,QAAQ,cAAc,IAAI;QAC5C;QAGA,MAAMoB,cAAc;YAClB,GAAGD,OAAO;YACV,mBAAmBnB,QAAQ,iBAAiB,IAAI;QAClD;QAGA,MAAMX,cAAcR,qBAClBC,QACAkB,QAAQ,cAAc,IAAI;QAI5B,MAAMqB,SAAuC;YAC3C;gBACE,MAAM;gBACN,SAAS,CAAC,4GAA4G,EAAEC,mBAAmB;YAC7I;YACA;gBACE,MAAM;gBACN,SAAS,CAAC;;;AAGlB,EAAEC,KAAK,SAAS,CAACH,aAAa,MAAM,GAAG;;;;;;;;;8JASuH,CAAC;YACzJ;SACD;QAGD,IAAI/B,YAAY,MAAM,GAAG,GAAG;YAC1BgC,OAAO,IAAI,CAAC;gBACV,MAAM;gBACN,SACE;YACJ;YAEAA,OAAO,IAAI,CAAC;gBACV,MAAM;gBACN,SAAShC,YAAY,GAAG,CAAC,CAACC,aAAgB;wBACxC,MAAM;wBACN,WAAW;4BACT,KAAKA;wBACP;oBACF;YACF;QACF;QAEA,IAAIU,QAAQ,MAAM,IAAIA,QAAQ,OAAO,EAEnC,OAAO,MAAM6B,OAAOR,QAAQK,aAAa,IAAI,EAAER,aAAa;YAC1D,QAAQ;YACR,SAASlB,QAAQ,OAAO;QAC1B;QACK;YAEL,MAAMwB,WAAW,MAAMC,yBACrBJ,QACAK,aAAa,IAAI,EACjBR;YAGF,IAAIM,UAAU,WAAW,AAA4B,YAA5B,OAAOA,SAAS,OAAO,EAC9C,OAAO;gBACL,SAASA,SAAS,OAAO;gBACzB,OAAOA,SAAS,KAAK;gBACrB,YAAY;YACd;YAGF,MAAM,IAAIR,MAAM;QAClB;IACF,EAAE,OAAOW,OAAO;QACd,MAAM,IAAIX,MAAM,CAAC,8BAA8B,EAAEW,OAAO;IAC1D;AACF"}