@midscene/core 1.8.11 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (304) hide show
  1. package/dist/es/agent/agent.mjs +40 -50
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/task-builder.mjs +39 -19
  4. package/dist/es/agent/task-builder.mjs.map +1 -1
  5. package/dist/es/agent/tasks.mjs +24 -22
  6. package/dist/es/agent/tasks.mjs.map +1 -1
  7. package/dist/es/agent/utils.mjs +11 -14
  8. package/dist/es/agent/utils.mjs.map +1 -1
  9. package/dist/es/ai-model/connectivity.mjs +7 -3
  10. package/dist/es/ai-model/connectivity.mjs.map +1 -1
  11. package/dist/es/ai-model/errors.mjs +9 -0
  12. package/dist/es/ai-model/errors.mjs.map +1 -0
  13. package/dist/es/ai-model/index.mjs +3 -4
  14. package/dist/es/ai-model/inspect.mjs +132 -144
  15. package/dist/es/ai-model/inspect.mjs.map +1 -1
  16. package/dist/es/ai-model/llm-planning.mjs +46 -28
  17. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  18. package/dist/es/ai-model/{auto-glm → models/auto-glm}/actions.mjs +22 -44
  19. package/dist/es/ai-model/models/auto-glm/actions.mjs.map +1 -0
  20. package/dist/es/ai-model/models/auto-glm/adapter.mjs +45 -0
  21. package/dist/es/ai-model/models/auto-glm/adapter.mjs.map +1 -0
  22. package/dist/es/ai-model/models/auto-glm/locate.mjs +112 -0
  23. package/dist/es/ai-model/models/auto-glm/locate.mjs.map +1 -0
  24. package/dist/es/ai-model/models/auto-glm/parser.mjs.map +1 -0
  25. package/dist/es/ai-model/{auto-glm → models/auto-glm}/planning.mjs +6 -7
  26. package/dist/es/ai-model/models/auto-glm/planning.mjs.map +1 -0
  27. package/dist/es/ai-model/{auto-glm → models/auto-glm}/prompt.mjs +3 -11
  28. package/dist/es/ai-model/models/auto-glm/prompt.mjs.map +1 -0
  29. package/dist/es/ai-model/models/default.mjs +12 -0
  30. package/dist/es/ai-model/models/default.mjs.map +1 -0
  31. package/dist/es/ai-model/models/doubao.mjs +138 -0
  32. package/dist/es/ai-model/models/doubao.mjs.map +1 -0
  33. package/dist/es/ai-model/models/gemini.mjs +34 -0
  34. package/dist/es/ai-model/models/gemini.mjs.map +1 -0
  35. package/dist/es/ai-model/models/glm.mjs +37 -0
  36. package/dist/es/ai-model/models/glm.mjs.map +1 -0
  37. package/dist/es/ai-model/models/gpt.mjs +31 -0
  38. package/dist/es/ai-model/models/gpt.mjs.map +1 -0
  39. package/dist/es/ai-model/models/index.mjs +2 -0
  40. package/dist/es/ai-model/models/qwen.mjs +113 -0
  41. package/dist/es/ai-model/models/qwen.mjs.map +1 -0
  42. package/dist/es/ai-model/models/registry.mjs +45 -0
  43. package/dist/es/ai-model/models/registry.mjs.map +1 -0
  44. package/dist/es/ai-model/models/resolved.mjs +104 -0
  45. package/dist/es/ai-model/models/resolved.mjs.map +1 -0
  46. package/dist/es/ai-model/models/types.mjs +0 -0
  47. package/dist/es/ai-model/models/ui-tars/adapter.mjs +142 -0
  48. package/dist/es/ai-model/models/ui-tars/adapter.mjs.map +1 -0
  49. package/dist/es/ai-model/{ui-tars-planning.mjs → models/ui-tars/planning.mjs} +44 -62
  50. package/dist/es/ai-model/models/ui-tars/planning.mjs.map +1 -0
  51. package/dist/es/ai-model/prompt/extraction.mjs +3 -3
  52. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -1
  53. package/dist/es/ai-model/prompt/llm-locator.mjs +11 -11
  54. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
  55. package/dist/es/ai-model/prompt/llm-planning.mjs +25 -60
  56. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
  57. package/dist/es/ai-model/prompt/llm-section-locator.mjs +15 -10
  58. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
  59. package/dist/es/ai-model/prompt/locate-grounding-rules.mjs +9 -0
  60. package/dist/es/ai-model/prompt/locate-grounding-rules.mjs.map +1 -0
  61. package/dist/es/ai-model/prompt/locate-param-example.mjs +15 -0
  62. package/dist/es/ai-model/prompt/locate-param-example.mjs.map +1 -0
  63. package/dist/es/ai-model/prompt/playwright-generator.mjs +5 -5
  64. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
  65. package/dist/es/ai-model/prompt/yaml-generator.mjs +5 -5
  66. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
  67. package/dist/es/ai-model/prompts/locate-result-coordinates.mjs +107 -0
  68. package/dist/es/ai-model/prompts/locate-result-coordinates.mjs.map +1 -0
  69. package/dist/es/ai-model/service-caller/index.mjs +59 -190
  70. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  71. package/dist/es/ai-model/service-caller/json.mjs +60 -0
  72. package/dist/es/ai-model/service-caller/json.mjs.map +1 -0
  73. package/dist/es/ai-model/shared/model-locate-result/bbox.mjs +68 -0
  74. package/dist/es/ai-model/shared/model-locate-result/bbox.mjs.map +1 -0
  75. package/dist/es/ai-model/shared/model-locate-result/factory.mjs +96 -0
  76. package/dist/es/ai-model/shared/model-locate-result/factory.mjs.map +1 -0
  77. package/dist/es/ai-model/shared/model-locate-result/index.mjs +3 -0
  78. package/dist/es/ai-model/shared/model-locate-result/parse.mjs +41 -0
  79. package/dist/es/ai-model/shared/model-locate-result/parse.mjs.map +1 -0
  80. package/dist/es/ai-model/shared/model-locate-result/pixel-bbox-mapper.mjs +64 -0
  81. package/dist/es/ai-model/shared/model-locate-result/pixel-bbox-mapper.mjs.map +1 -0
  82. package/dist/es/ai-model/shared/model-locate-result/types.mjs +0 -0
  83. package/dist/es/ai-model/types.mjs +0 -0
  84. package/dist/es/ai-model/workflows/image-preprocess.mjs +27 -0
  85. package/dist/es/ai-model/workflows/image-preprocess.mjs.map +1 -0
  86. package/dist/es/ai-model/workflows/inspect/index.mjs +2 -0
  87. package/dist/es/ai-model/workflows/inspect/locate-result-rect.mjs +23 -0
  88. package/dist/es/ai-model/workflows/inspect/locate-result-rect.mjs.map +1 -0
  89. package/dist/es/ai-model/workflows/inspect/search-area-mapping.mjs +18 -0
  90. package/dist/es/ai-model/workflows/inspect/search-area-mapping.mjs.map +1 -0
  91. package/dist/es/ai-model/workflows/inspect/types.mjs +0 -0
  92. package/dist/es/ai-model/workflows/planning/index.mjs +5 -0
  93. package/dist/es/ai-model/workflows/planning/index.mjs.map +1 -0
  94. package/dist/es/ai-model/workflows/planning/types.mjs +0 -0
  95. package/dist/es/common.mjs +2 -174
  96. package/dist/es/common.mjs.map +1 -1
  97. package/dist/es/device/index.mjs.map +1 -1
  98. package/dist/es/service/index.mjs +96 -69
  99. package/dist/es/service/index.mjs.map +1 -1
  100. package/dist/es/types.mjs.map +1 -1
  101. package/dist/es/utils.mjs +2 -2
  102. package/dist/es/yaml/player.mjs +4 -3
  103. package/dist/es/yaml/player.mjs.map +1 -1
  104. package/dist/lib/agent/agent.js +43 -53
  105. package/dist/lib/agent/agent.js.map +1 -1
  106. package/dist/lib/agent/task-builder.js +38 -18
  107. package/dist/lib/agent/task-builder.js.map +1 -1
  108. package/dist/lib/agent/tasks.js +23 -21
  109. package/dist/lib/agent/tasks.js.map +1 -1
  110. package/dist/lib/agent/utils.js +17 -17
  111. package/dist/lib/agent/utils.js.map +1 -1
  112. package/dist/lib/ai-model/connectivity.js +7 -3
  113. package/dist/lib/ai-model/connectivity.js.map +1 -1
  114. package/dist/lib/ai-model/errors.js +46 -0
  115. package/dist/lib/ai-model/errors.js.map +1 -0
  116. package/dist/lib/ai-model/index.js +7 -14
  117. package/dist/lib/ai-model/inspect.js +141 -144
  118. package/dist/lib/ai-model/inspect.js.map +1 -1
  119. package/dist/lib/ai-model/llm-planning.js +44 -26
  120. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  121. package/dist/lib/ai-model/{auto-glm → models/auto-glm}/actions.js +22 -44
  122. package/dist/lib/ai-model/models/auto-glm/actions.js.map +1 -0
  123. package/dist/lib/ai-model/models/auto-glm/adapter.js +79 -0
  124. package/dist/lib/ai-model/models/auto-glm/adapter.js.map +1 -0
  125. package/dist/lib/ai-model/models/auto-glm/locate.js +146 -0
  126. package/dist/lib/ai-model/models/auto-glm/locate.js.map +1 -0
  127. package/dist/lib/ai-model/models/auto-glm/parser.js.map +1 -0
  128. package/dist/lib/ai-model/{auto-glm → models/auto-glm}/planning.js +8 -9
  129. package/dist/lib/ai-model/models/auto-glm/planning.js.map +1 -0
  130. package/dist/lib/ai-model/{auto-glm → models/auto-glm}/prompt.js +14 -16
  131. package/dist/lib/ai-model/models/auto-glm/prompt.js.map +1 -0
  132. package/dist/lib/ai-model/{auto-glm/util.js → models/default.js} +13 -13
  133. package/dist/lib/ai-model/models/default.js.map +1 -0
  134. package/dist/lib/ai-model/models/doubao.js +184 -0
  135. package/dist/lib/ai-model/models/doubao.js.map +1 -0
  136. package/dist/lib/ai-model/models/gemini.js +68 -0
  137. package/dist/lib/ai-model/models/gemini.js.map +1 -0
  138. package/dist/lib/ai-model/models/glm.js +71 -0
  139. package/dist/lib/ai-model/models/glm.js.map +1 -0
  140. package/dist/lib/ai-model/models/gpt.js +65 -0
  141. package/dist/lib/ai-model/models/gpt.js.map +1 -0
  142. package/dist/lib/ai-model/{service-caller/image-detail.js → models/index.js} +8 -7
  143. package/dist/lib/ai-model/models/index.js.map +1 -0
  144. package/dist/lib/ai-model/models/qwen.js +147 -0
  145. package/dist/lib/ai-model/models/qwen.js.map +1 -0
  146. package/dist/lib/ai-model/models/registry.js +85 -0
  147. package/dist/lib/ai-model/models/registry.js.map +1 -0
  148. package/dist/lib/ai-model/models/resolved.js +138 -0
  149. package/dist/lib/ai-model/models/resolved.js.map +1 -0
  150. package/dist/lib/ai-model/models/types.js +20 -0
  151. package/dist/lib/ai-model/models/types.js.map +1 -0
  152. package/dist/lib/ai-model/models/ui-tars/adapter.js +176 -0
  153. package/dist/lib/ai-model/models/ui-tars/adapter.js.map +1 -0
  154. package/dist/lib/ai-model/{ui-tars-planning.js → models/ui-tars/planning.js} +44 -62
  155. package/dist/lib/ai-model/models/ui-tars/planning.js.map +1 -0
  156. package/dist/lib/ai-model/prompt/extraction.js +3 -3
  157. package/dist/lib/ai-model/prompt/extraction.js.map +1 -1
  158. package/dist/lib/ai-model/prompt/llm-locator.js +11 -11
  159. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
  160. package/dist/lib/ai-model/prompt/llm-planning.js +25 -60
  161. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
  162. package/dist/lib/ai-model/prompt/llm-section-locator.js +15 -10
  163. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
  164. package/dist/lib/ai-model/prompt/locate-grounding-rules.js +43 -0
  165. package/dist/lib/ai-model/prompt/locate-grounding-rules.js.map +1 -0
  166. package/dist/lib/ai-model/prompt/locate-param-example.js +52 -0
  167. package/dist/lib/ai-model/prompt/locate-param-example.js.map +1 -0
  168. package/dist/lib/ai-model/prompt/playwright-generator.js +5 -5
  169. package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
  170. package/dist/lib/ai-model/prompt/yaml-generator.js +5 -5
  171. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
  172. package/dist/lib/ai-model/prompts/locate-result-coordinates.js +150 -0
  173. package/dist/lib/ai-model/prompts/locate-result-coordinates.js.map +1 -0
  174. package/dist/lib/ai-model/service-caller/index.js +68 -199
  175. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  176. package/dist/lib/ai-model/service-caller/json.js +100 -0
  177. package/dist/lib/ai-model/service-caller/json.js.map +1 -0
  178. package/dist/lib/ai-model/shared/model-locate-result/bbox.js +117 -0
  179. package/dist/lib/ai-model/shared/model-locate-result/bbox.js.map +1 -0
  180. package/dist/lib/ai-model/shared/model-locate-result/factory.js +130 -0
  181. package/dist/lib/ai-model/shared/model-locate-result/factory.js.map +1 -0
  182. package/dist/lib/ai-model/{prompt/common.js → shared/model-locate-result/index.js} +9 -9
  183. package/dist/lib/ai-model/shared/model-locate-result/index.js.map +1 -0
  184. package/dist/lib/ai-model/shared/model-locate-result/parse.js +78 -0
  185. package/dist/lib/ai-model/shared/model-locate-result/parse.js.map +1 -0
  186. package/dist/lib/ai-model/shared/model-locate-result/pixel-bbox-mapper.js +98 -0
  187. package/dist/lib/ai-model/shared/model-locate-result/pixel-bbox-mapper.js.map +1 -0
  188. package/dist/lib/ai-model/shared/model-locate-result/types.js +20 -0
  189. package/dist/lib/ai-model/shared/model-locate-result/types.js.map +1 -0
  190. package/dist/lib/ai-model/types.js +20 -0
  191. package/dist/lib/ai-model/types.js.map +1 -0
  192. package/dist/lib/ai-model/workflows/image-preprocess.js +61 -0
  193. package/dist/lib/ai-model/workflows/image-preprocess.js.map +1 -0
  194. package/dist/lib/ai-model/workflows/inspect/index.js +50 -0
  195. package/dist/lib/ai-model/workflows/inspect/index.js.map +1 -0
  196. package/dist/lib/ai-model/workflows/inspect/locate-result-rect.js +60 -0
  197. package/dist/lib/ai-model/workflows/inspect/locate-result-rect.js.map +1 -0
  198. package/dist/lib/ai-model/workflows/inspect/search-area-mapping.js +52 -0
  199. package/dist/lib/ai-model/workflows/inspect/search-area-mapping.js.map +1 -0
  200. package/dist/lib/ai-model/workflows/inspect/types.js +20 -0
  201. package/dist/lib/ai-model/workflows/inspect/types.js.map +1 -0
  202. package/dist/lib/ai-model/{model-family.js → workflows/planning/index.js} +6 -7
  203. package/dist/lib/ai-model/workflows/planning/index.js.map +1 -0
  204. package/dist/lib/ai-model/workflows/planning/types.js +20 -0
  205. package/dist/lib/ai-model/workflows/planning/types.js.map +1 -0
  206. package/dist/lib/common.js +4 -206
  207. package/dist/lib/common.js.map +1 -1
  208. package/dist/lib/device/index.js.map +1 -1
  209. package/dist/lib/service/index.js +96 -69
  210. package/dist/lib/service/index.js.map +1 -1
  211. package/dist/lib/types.js.map +1 -1
  212. package/dist/lib/utils.js +2 -2
  213. package/dist/lib/yaml/player.js +4 -3
  214. package/dist/lib/yaml/player.js.map +1 -1
  215. package/dist/types/agent/agent.d.ts +14 -6
  216. package/dist/types/agent/task-builder.d.ts +2 -2
  217. package/dist/types/agent/tasks.d.ts +6 -6
  218. package/dist/types/agent/utils.d.ts +8 -5
  219. package/dist/types/ai-model/errors.d.ts +2 -0
  220. package/dist/types/ai-model/index.d.ts +2 -4
  221. package/dist/types/ai-model/inspect.d.ts +13 -33
  222. package/dist/types/ai-model/llm-planning.d.ts +6 -17
  223. package/dist/types/ai-model/{auto-glm → models/auto-glm}/actions.d.ts +2 -2
  224. package/dist/types/ai-model/models/auto-glm/adapter.d.ts +5 -0
  225. package/dist/types/ai-model/models/auto-glm/locate.d.ts +3 -0
  226. package/dist/types/ai-model/models/auto-glm/planning.d.ts +3 -0
  227. package/dist/types/ai-model/models/auto-glm/prompt.d.ts +4 -0
  228. package/dist/types/ai-model/models/default.d.ts +2 -0
  229. package/dist/types/ai-model/models/doubao.d.ts +10 -0
  230. package/dist/types/ai-model/models/gemini.d.ts +18 -0
  231. package/dist/types/ai-model/models/glm.d.ts +18 -0
  232. package/dist/types/ai-model/models/gpt.d.ts +18 -0
  233. package/dist/types/ai-model/models/index.d.ts +2 -0
  234. package/dist/types/ai-model/models/qwen.d.ts +30 -0
  235. package/dist/types/ai-model/models/registry.d.ts +81 -0
  236. package/dist/types/ai-model/models/resolved.d.ts +9 -0
  237. package/dist/types/ai-model/models/types.d.ts +102 -0
  238. package/dist/types/ai-model/models/ui-tars/adapter.d.ts +6 -0
  239. package/dist/types/ai-model/{ui-tars-planning.d.ts → models/ui-tars/planning.d.ts} +7 -11
  240. package/dist/types/ai-model/prompt/llm-locator.d.ts +2 -2
  241. package/dist/types/ai-model/prompt/llm-planning.d.ts +5 -5
  242. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +2 -2
  243. package/dist/types/ai-model/prompt/locate-grounding-rules.d.ts +1 -0
  244. package/dist/types/ai-model/prompt/locate-param-example.d.ts +3 -0
  245. package/dist/types/ai-model/prompt/playwright-generator.d.ts +3 -3
  246. package/dist/types/ai-model/prompt/yaml-generator.d.ts +3 -3
  247. package/dist/types/ai-model/prompts/locate-result-coordinates.d.ts +6 -0
  248. package/dist/types/ai-model/service-caller/index.d.ts +19 -27
  249. package/dist/types/ai-model/service-caller/json.d.ts +9 -0
  250. package/dist/types/ai-model/shared/model-locate-result/bbox.d.ts +7 -0
  251. package/dist/types/ai-model/shared/model-locate-result/factory.d.ts +2 -0
  252. package/dist/types/ai-model/shared/model-locate-result/index.d.ts +3 -0
  253. package/dist/types/ai-model/shared/model-locate-result/parse.d.ts +5 -0
  254. package/dist/types/ai-model/shared/model-locate-result/pixel-bbox-mapper.d.ts +7 -0
  255. package/dist/types/ai-model/shared/model-locate-result/types.d.ts +157 -0
  256. package/dist/types/ai-model/types.d.ts +2 -0
  257. package/dist/types/ai-model/workflows/image-preprocess.d.ts +30 -0
  258. package/dist/types/ai-model/workflows/inspect/index.d.ts +1 -0
  259. package/dist/types/ai-model/workflows/inspect/locate-result-rect.d.ts +4 -0
  260. package/dist/types/ai-model/workflows/inspect/search-area-mapping.d.ts +3 -0
  261. package/dist/types/ai-model/workflows/inspect/types.d.ts +37 -0
  262. package/dist/types/ai-model/workflows/planning/index.d.ts +2 -0
  263. package/dist/types/ai-model/workflows/planning/types.d.ts +15 -0
  264. package/dist/types/common.d.ts +0 -30
  265. package/dist/types/device/index.d.ts +22 -22
  266. package/dist/types/service/index.d.ts +5 -4
  267. package/dist/types/types.d.ts +21 -9
  268. package/dist/types/yaml.d.ts +8 -2
  269. package/package.json +2 -2
  270. package/dist/es/ai-model/auto-glm/actions.mjs.map +0 -1
  271. package/dist/es/ai-model/auto-glm/index.mjs +0 -6
  272. package/dist/es/ai-model/auto-glm/parser.mjs.map +0 -1
  273. package/dist/es/ai-model/auto-glm/planning.mjs.map +0 -1
  274. package/dist/es/ai-model/auto-glm/prompt.mjs.map +0 -1
  275. package/dist/es/ai-model/auto-glm/util.mjs +0 -9
  276. package/dist/es/ai-model/auto-glm/util.mjs.map +0 -1
  277. package/dist/es/ai-model/model-family.mjs +0 -6
  278. package/dist/es/ai-model/model-family.mjs.map +0 -1
  279. package/dist/es/ai-model/prompt/common.mjs +0 -8
  280. package/dist/es/ai-model/prompt/common.mjs.map +0 -1
  281. package/dist/es/ai-model/service-caller/image-detail.mjs +0 -6
  282. package/dist/es/ai-model/service-caller/image-detail.mjs.map +0 -1
  283. package/dist/es/ai-model/ui-tars-planning.mjs.map +0 -1
  284. package/dist/lib/ai-model/auto-glm/actions.js.map +0 -1
  285. package/dist/lib/ai-model/auto-glm/index.js +0 -66
  286. package/dist/lib/ai-model/auto-glm/index.js.map +0 -1
  287. package/dist/lib/ai-model/auto-glm/parser.js.map +0 -1
  288. package/dist/lib/ai-model/auto-glm/planning.js.map +0 -1
  289. package/dist/lib/ai-model/auto-glm/prompt.js.map +0 -1
  290. package/dist/lib/ai-model/auto-glm/util.js.map +0 -1
  291. package/dist/lib/ai-model/model-family.js.map +0 -1
  292. package/dist/lib/ai-model/prompt/common.js.map +0 -1
  293. package/dist/lib/ai-model/service-caller/image-detail.js.map +0 -1
  294. package/dist/lib/ai-model/ui-tars-planning.js.map +0 -1
  295. package/dist/types/ai-model/auto-glm/index.d.ts +0 -6
  296. package/dist/types/ai-model/auto-glm/planning.d.ts +0 -12
  297. package/dist/types/ai-model/auto-glm/prompt.d.ts +0 -27
  298. package/dist/types/ai-model/auto-glm/util.d.ts +0 -13
  299. package/dist/types/ai-model/model-family.d.ts +0 -7
  300. package/dist/types/ai-model/prompt/common.d.ts +0 -2
  301. package/dist/types/ai-model/service-caller/image-detail.d.ts +0 -2
  302. /package/dist/es/ai-model/{auto-glm → models/auto-glm}/parser.mjs +0 -0
  303. /package/dist/lib/ai-model/{auto-glm → models/auto-glm}/parser.js +0 -0
  304. /package/dist/types/ai-model/{auto-glm → models/auto-glm}/parser.d.ts +0 -0
@@ -30,9 +30,11 @@ __webpack_require__.d(__webpack_exports__, {
30
30
  const external_common_js_namespaceObject = require("../../common.js");
31
31
  const env_namespaceObject = require("@midscene/shared/env");
32
32
  const zod_schema_utils_namespaceObject = require("@midscene/shared/zod-schema-utils");
33
- const external_common_js_namespaceObject_1 = require("./common.js");
34
- const vlLocateParam = (modelFamily)=>{
35
- if (modelFamily) return `{bbox: [number, number, number, number], prompt: string } // ${(0, external_common_js_namespaceObject_1.bboxDescription)(modelFamily)}`;
33
+ const external_errors_js_namespaceObject = require("../errors.js");
34
+ const external_locate_grounding_rules_js_namespaceObject = require("./locate-grounding-rules.js");
35
+ const external_locate_param_example_js_namespaceObject = require("./locate-param-example.js");
36
+ const locateParamSchemaDescription = (promptSpec)=>{
37
+ if (promptSpec) return `{${promptSpec.resultKey}: ${promptSpec.resultValueSchema}, prompt: string } // ${promptSpec.resultValueDescription}`;
36
38
  return "{ prompt: string /* description of the target element */ }";
37
39
  };
38
40
  const OBSERVE_STEP_NOTES = "### Observation Guidelines\n\n- Treat visible summaries, thumbnails, cropped content, and partially visible lists as potentially incomplete when the task depends on precise details.\n- If the current view does not provide enough information to decide safely, use available UI affordances such as opening details, expanding content, previewing, enlarging, zooming, or scrolling before acting.";
@@ -50,48 +52,23 @@ const findDefaultValue = (field)=>{
50
52
  else break;
51
53
  }
52
54
  };
53
- const SAMPLE_BBOXES = [
54
- [
55
- 50,
56
- 100,
57
- 200,
58
- 200
59
- ],
60
- [
61
- 300,
62
- 400,
63
- 500,
64
- 500
65
- ],
66
- [
67
- 600,
68
- 100,
69
- 800,
70
- 250
71
- ],
72
- [
73
- 50,
74
- 600,
75
- 250,
76
- 750
77
- ]
78
- ];
79
- const injectBboxIntoSample = (sample, locateFields, includeBbox)=>{
80
- if (!includeBbox) return sample;
55
+ const injectLocateResultIntoSample = (sample, locateFields, promptSpec)=>{
56
+ const resultKey = promptSpec.resultKey;
57
+ const sampleResults = promptSpec.exampleValues;
81
58
  const result = {
82
59
  ...sample
83
60
  };
84
- let bboxIndex = 0;
61
+ let sampleResultIndex = 0;
85
62
  for (const field of locateFields)if (result[field] && 'object' == typeof result[field] && result[field].prompt) {
86
63
  result[field] = {
87
64
  ...result[field],
88
- bbox: SAMPLE_BBOXES[bboxIndex % SAMPLE_BBOXES.length]
65
+ [resultKey]: sampleResults[sampleResultIndex % sampleResults.length]
89
66
  };
90
- bboxIndex++;
67
+ sampleResultIndex++;
91
68
  }
92
69
  return result;
93
70
  };
94
- const descriptionForAction = (action, locatorSchemaTypeDescription, includeBbox = false)=>{
71
+ const descriptionForAction = (action, locateParamTypeDescription, includeLocateInPlanning = false, locatePromptSpec)=>{
95
72
  const tab = ' ';
96
73
  const fields = [];
97
74
  fields.push(`- type: "${action.name}"`);
@@ -104,7 +81,7 @@ const descriptionForAction = (action, locatorSchemaTypeDescription, includeBbox
104
81
  for (const [key, field] of Object.entries(shape))if (field && 'object' == typeof field) {
105
82
  const isOptional = 'function' == typeof field.isOptional && field.isOptional();
106
83
  const keyWithOptional = isOptional ? `${key}?` : key;
107
- const typeName = (0, zod_schema_utils_namespaceObject.getZodTypeName)(field, locatorSchemaTypeDescription);
84
+ const typeName = (0, zod_schema_utils_namespaceObject.getZodTypeName)(field, locateParamTypeDescription);
108
85
  const description = (0, zod_schema_utils_namespaceObject.getZodDescription)(field);
109
86
  const defaultValue = findDefaultValue(field);
110
87
  const hasDefault = void 0 !== defaultValue;
@@ -135,38 +112,24 @@ const descriptionForAction = (action, locatorSchemaTypeDescription, includeBbox
135
112
  }
136
113
  if (action.sample && 'object' == typeof action.sample) {
137
114
  const locateFields = (0, external_common_js_namespaceObject.findAllMidsceneLocatorField)(action.paramSchema);
138
- const sampleWithBbox = injectBboxIntoSample(action.sample, locateFields, includeBbox);
139
- const sampleStr = `- sample:\n${tab}${tab}<action-type>${action.name}</action-type>\n${tab}${tab}<action-param-json>\n${tab}${tab}${JSON.stringify(sampleWithBbox, null, 2).replace(/\n/g, `\n${tab}${tab}`)}\n${tab}${tab}</action-param-json>`;
115
+ const sampleWithLocateResult = includeLocateInPlanning && locatePromptSpec ? injectLocateResultIntoSample(action.sample, locateFields, locatePromptSpec) : action.sample;
116
+ const sampleStr = `- sample:\n${tab}${tab}<action-type>${action.name}</action-type>\n${tab}${tab}<action-param-json>\n${tab}${tab}${JSON.stringify(sampleWithLocateResult, null, 2).replace(/\n/g, `\n${tab}${tab}`)}\n${tab}${tab}</action-param-json>`;
140
117
  fields.push(sampleStr);
141
118
  }
142
119
  return `- ${action.name}, ${action.description || "No description provided"}
143
120
  ${tab}${fields.join(`\n${tab}`)}
144
121
  `.trim();
145
122
  };
146
- async function systemPromptToTaskPlanning({ actionSpace, modelFamily, includeBbox, includeThought, includeSubGoals }) {
123
+ async function systemPromptToTaskPlanning({ actionSpace, locatePromptSpec, includeLocateInPlanning, includeThought, includeSubGoals }) {
147
124
  const preferredLanguage = (0, env_namespaceObject.getPreferredLanguage)();
148
- if (includeBbox && !modelFamily) throw new Error('modelFamily cannot be undefined when includeBbox is true. A valid modelFamily is required for bbox-based location.');
149
- const actionDescriptionList = actionSpace.map((action)=>descriptionForAction(action, vlLocateParam(includeBbox ? modelFamily : void 0), includeBbox));
125
+ if (includeLocateInPlanning && !locatePromptSpec) throw new Error((0, external_errors_js_namespaceObject.planningModelFamilyRequiredForLocateMessage)());
126
+ const actionDescriptionList = actionSpace.map((action)=>descriptionForAction(action, locateParamSchemaDescription(includeLocateInPlanning ? locatePromptSpec : void 0), includeLocateInPlanning, locatePromptSpec));
150
127
  const actionList = actionDescriptionList.join('\n');
151
128
  const shouldIncludeSubGoals = includeSubGoals ?? false;
152
- const locateExample1 = includeBbox ? `{
153
- "prompt": "Add to cart button for Sauce Labs Backpack",
154
- "bbox": [345, 442, 458, 483]
155
- }` : `{
156
- "prompt": "Add to cart button for Sauce Labs Backpack"
157
- }`;
158
- const locateNameField = includeBbox ? `{
159
- "prompt": "Name input field in the registration form",
160
- "bbox": [120, 180, 380, 210]
161
- }` : `{
162
- "prompt": "Name input field in the registration form"
163
- }`;
164
- const locateEmailField = includeBbox ? `{
165
- "prompt": "Email input field in the registration form",
166
- "bbox": [120, 240, 380, 270]
167
- }` : `{
168
- "prompt": "Email input field in the registration form"
169
- }`;
129
+ const locateExample = (prompt, exampleValueIndex)=>(0, external_locate_param_example_js_namespaceObject.locateParamExample)(prompt, includeLocateInPlanning ? locatePromptSpec : void 0, locatePromptSpec?.exampleValues[exampleValueIndex] ?? locatePromptSpec?.exampleValues[0]);
130
+ const locateExample1 = locateExample('Add to cart button for Sauce Labs Backpack', 1);
131
+ const locateNameField = locateExample('Name input field in the registration form', 2);
132
+ const locateEmailField = locateExample('Email input field in the registration form', 3);
170
133
  const step1Title = shouldIncludeSubGoals ? '## Step 1: Observe and Plan (related tags: <thought>, <update-plan-content>, <mark-sub-goal-done>)' : '## Step 1: Observe (related tags: <thought>)';
171
134
  const step1Description = shouldIncludeSubGoals ? "First, observe the current screenshot and previous logs, then break down the user's instruction into multiple high-level sub-goals. Update the status of sub-goals based on what you see in the current screenshot." : 'First, observe the current screenshot and previous logs to understand the current state.';
172
135
  const explicitInstructionRule = 'CRITICAL - Following Explicit Instructions: When the user gives you specific operation steps (not high-level goals), you MUST execute ONLY those exact steps - nothing more, nothing less. Do NOT add extra actions even if they seem logical. For example: "fill out the form" means only fill fields, do NOT submit; "click the button" means only click, do NOT wait for page load or verify results; "type \'hello\'" means only type, do NOT press Enter.';
@@ -335,7 +298,9 @@ ONLY if the task is not complete: Think what the next action is according to the
335
298
 
336
299
  ${ACTION_STEP_NOTES}
337
300
 
338
- ### Supporting actions list
301
+ ${includeLocateInPlanning ? `${(0, external_locate_grounding_rules_js_namespaceObject.locateGroundingRules)()}
302
+
303
+ ` : ''}### Supporting actions list
339
304
 
340
305
  ${actionList}
341
306
 
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/prompt/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import { findAllMidsceneLocatorField } from '@/common';\nimport type { DeviceAction } from '@/types';\nimport type { TModelFamily } from '@midscene/shared/env';\nimport { getPreferredLanguage } from '@midscene/shared/env';\nimport {\n getZodDescription,\n getZodTypeName,\n} from '@midscene/shared/zod-schema-utils';\nimport type { z } from 'zod';\nimport { bboxDescription } from './common';\n\nconst vlLocateParam = (modelFamily: TModelFamily | undefined) => {\n if (modelFamily) {\n return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(modelFamily)}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\nconst OBSERVE_STEP_NOTES = [\n '### Observation Guidelines',\n '',\n '- Treat visible summaries, thumbnails, cropped content, and partially visible lists as potentially incomplete when the task depends on precise details.',\n '- If the current view does not provide enough information to decide safely, use available UI affordances such as opening details, expanding content, previewing, enlarging, zooming, or scrolling before acting.',\n].join('\\n');\n\nconst MEMORY_STEP_NOTES = [\n 'Use `<memory>` to record clear, task-relevant information from the current screenshot that may be needed in later steps. The current screenshot will not be available later, so memory should preserve enough detail for future reasoning, verification, or action.',\n '',\n '- Record information completely and exactly as shown. Do not summarize, translate, normalize, or merge values that may matter later.',\n '- When recording an item, include the item itself, its exact task-relevant details, and the visible cue or UI context that identifies where it came from when relevant.',\n '- Keep similar or repeated items as separate memory entries unless their task-relevant details are confirmed to be the same.',\n '- After navigation, scrolling, editing, deletion, saving, or other screen changes, treat remembered positions, order, indexes, and UI bindings as references only. Re-check the current screen before acting on them.',\n '',\n 'Examples:',\n '- If you need to find an item and later assert its details, record the item name and the exact details needed for the assertion, such as status, price, date, owner, description, or other visible fields.',\n '- If you need to compare multiple similar results, record each candidate separately with its exact distinguishing details and visible context.',\n '- If you need to copy information from one place to another, record the exact source value and the target field or UI cue it should be mapped to.',\n].join('\\n');\n\nconst ACTION_STEP_NOTES = [\n '### Action Guidelines',\n '',\n '- When editing existing text in a UI field, preserve all existing text by moving the cursor and typing/deleting the minimal necessary characters, and use Input with mode \"typeOnly\" when typing new characters for such edits.',\n].join('\\n');\n\n/**\n * Find ZodDefault in the wrapper chain and return its default value\n */\nconst findDefaultValue = (field: unknown): any | undefined => {\n let current = field;\n const visited = new Set<unknown>();\n\n while (current && !visited.has(current)) {\n visited.add(current);\n const currentWithDef = current as {\n _def?: {\n typeName?: string;\n defaultValue?: () => any;\n innerType?: unknown;\n };\n };\n\n if (!currentWithDef._def?.typeName) break;\n\n if (currentWithDef._def.typeName === 'ZodDefault') {\n return currentWithDef._def.defaultValue?.();\n }\n\n // Continue unwrapping if it's a wrapper type\n if (\n currentWithDef._def.typeName === 'ZodOptional' ||\n currentWithDef._def.typeName === 'ZodNullable'\n ) {\n current = currentWithDef._def.innerType;\n } else {\n break;\n }\n }\n\n return undefined;\n};\n\n/**\n * Inject bbox into locate fields of a sample object.\n * Walks the sample and for any locate field (identified by paramSchema),\n * adds a fake bbox array when includeBbox is true.\n */\nconst SAMPLE_BBOXES: [number, number, number, number][] = [\n [50, 100, 200, 200],\n [300, 400, 500, 500],\n [600, 100, 800, 250],\n [50, 600, 250, 750],\n];\n\nconst injectBboxIntoSample = (\n sample: Record<string, any>,\n locateFields: string[],\n includeBbox: boolean,\n): Record<string, any> => {\n if (!includeBbox) return sample;\n const result = { ...sample };\n let bboxIndex = 0;\n for (const field of locateFields) {\n if (\n result[field] &&\n typeof result[field] === 'object' &&\n result[field].prompt\n ) {\n result[field] = {\n ...result[field],\n bbox: SAMPLE_BBOXES[bboxIndex % SAMPLE_BBOXES.length],\n };\n bboxIndex++;\n }\n }\n return result;\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n includeBbox = false,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as {\n _def?: { typeName?: string };\n shape?: Record<string, unknown>;\n };\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as { isOptional?: () => boolean }).isOptional ===\n 'function' &&\n (field as { isOptional: () => boolean }).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name using extracted helper\n const typeName = getZodTypeName(field, locatorSchemaTypeDescription);\n\n // Get description using extracted helper\n const description = getZodDescription(field as z.ZodTypeAny);\n\n // Check if field has a default value by searching the wrapper chain\n const defaultValue = findDefaultValue(field);\n const hasDefault = defaultValue !== undefined;\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n const comments: string[] = [];\n if (description) {\n comments.push(description);\n }\n if (hasDefault) {\n const defaultStr =\n typeof defaultValue === 'string'\n ? `\"${defaultValue}\"`\n : JSON.stringify(defaultValue);\n comments.push(`default: ${defaultStr}`);\n }\n if (comments.length > 0) {\n paramLine += ` // ${comments.join(', ')}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n const typeName = getZodTypeName(schema);\n const description = getZodDescription(schema as z.ZodTypeAny);\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n // Render sample if provided, using the same XML tag format as the real output\n if (action.sample && typeof action.sample === 'object') {\n const locateFields = findAllMidsceneLocatorField(action.paramSchema);\n const sampleWithBbox = injectBboxIntoSample(\n action.sample,\n locateFields,\n includeBbox,\n );\n const sampleStr = `- sample:\\n${tab}${tab}<action-type>${action.name}</action-type>\\n${tab}${tab}<action-param-json>\\n${tab}${tab}${JSON.stringify(sampleWithBbox, null, 2).replace(/\\n/g, `\\n${tab}${tab}`)}\\n${tab}${tab}</action-param-json>`;\n fields.push(sampleStr);\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n modelFamily,\n includeBbox,\n includeThought,\n includeSubGoals,\n}: {\n actionSpace: DeviceAction<any>[];\n modelFamily: TModelFamily | undefined;\n includeBbox: boolean;\n includeThought?: boolean;\n includeSubGoals?: boolean;\n}) {\n const preferredLanguage = getPreferredLanguage();\n\n // Validate parameters: if includeBbox is true, modelFamily must be defined\n if (includeBbox && !modelFamily) {\n throw new Error(\n 'modelFamily cannot be undefined when includeBbox is true. A valid modelFamily is required for bbox-based location.',\n );\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n vlLocateParam(includeBbox ? modelFamily : undefined),\n includeBbox,\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n const shouldIncludeThought = includeThought ?? true;\n const shouldIncludeSubGoals = includeSubGoals ?? false;\n\n // Generate locate object examples based on includeBbox\n const locateExample1 = includeBbox\n ? `{\n \"prompt\": \"Add to cart button for Sauce Labs Backpack\",\n \"bbox\": [345, 442, 458, 483]\n }`\n : `{\n \"prompt\": \"Add to cart button for Sauce Labs Backpack\"\n }`;\n\n // Locate examples for multi-turn conversation\n const locateNameField = includeBbox\n ? `{\n \"prompt\": \"Name input field in the registration form\",\n \"bbox\": [120, 180, 380, 210]\n }`\n : `{\n \"prompt\": \"Name input field in the registration form\"\n }`;\n\n const locateEmailField = includeBbox\n ? `{\n \"prompt\": \"Email input field in the registration form\",\n \"bbox\": [120, 240, 380, 270]\n }`\n : `{\n \"prompt\": \"Email input field in the registration form\"\n }`;\n\n const thoughtTag = (content: string) =>\n shouldIncludeThought ? `<thought>${content}</thought>\\n` : '';\n\n // Sub-goals related content - only included when shouldIncludeSubGoals is true\n const step1Title = shouldIncludeSubGoals\n ? '## Step 1: Observe and Plan (related tags: <thought>, <update-plan-content>, <mark-sub-goal-done>)'\n : '## Step 1: Observe (related tags: <thought>)';\n\n const step1Description = shouldIncludeSubGoals\n ? \"First, observe the current screenshot and previous logs, then break down the user's instruction into multiple high-level sub-goals. Update the status of sub-goals based on what you see in the current screenshot.\"\n : 'First, observe the current screenshot and previous logs to understand the current state.';\n\n const explicitInstructionRule = `CRITICAL - Following Explicit Instructions: When the user gives you specific operation steps (not high-level goals), you MUST execute ONLY those exact steps - nothing more, nothing less. Do NOT add extra actions even if they seem logical. For example: \"fill out the form\" means only fill fields, do NOT submit; \"click the button\" means only click, do NOT wait for page load or verify results; \"type 'hello'\" means only type, do NOT press Enter.`;\n\n const thoughtTagDescription = shouldIncludeSubGoals\n ? `REQUIRED: You MUST always output the <thought> tag. Never skip it.\n\nInclude your thought process in the <thought> tag. It should answer: What is the user's requirement? What is the current state based on the screenshot? Are all sub-goals completed? If not, what should be the next action? Write your thoughts naturally without numbering or section headers.\n\n${explicitInstructionRule}`\n : `REQUIRED: You MUST always output the <thought> tag. Never skip it.\n\nInclude your thought process in the <thought> tag. It should answer: What is the current state based on the screenshot? What should be the next action? Write your thoughts naturally without numbering or section headers.\n\n${explicitInstructionRule}`;\n\n const subGoalTags = shouldIncludeSubGoals\n ? `\n\n* <update-plan-content> tag\n\nUse this structure to give or update your plan:\n\n<update-plan-content>\n <sub-goal index=\"1\" status=\"finished|pending\">sub goal description</sub-goal>\n <sub-goal index=\"2\" status=\"finished|pending\">sub goal description</sub-goal>\n ...\n</update-plan-content>\n\n* <mark-sub-goal-done> tag\n\nUse this structure to mark a sub-goal as done:\n\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n\nIMPORTANT: You MUST only mark a sub-goal as \"finished\" AFTER you have confirmed the task is actually completed by observing the result in the screenshot. Do NOT mark a sub-goal as done just because you expect the next action will complete it. Wait until you see visual confirmation in the screenshot that the sub-goal has been achieved.\n\n* Note\n\nDuring execution, you can call <update-plan-content> at any time to update the plan based on the latest screenshot and completed sub-goals.\n\n### Example\n\nIf the user wants to \"log in to a system using username and password, complete all to-do items, and submit a registration form\", you can break it down into the following sub-goals:\n\n<thought>...</thought>\n<update-plan-content>\n <sub-goal index=\"1\" status=\"pending\">Log in to the system</sub-goal>\n <sub-goal index=\"2\" status=\"pending\">Complete all to-do items</sub-goal>\n <sub-goal index=\"3\" status=\"pending\">Submit the registration form</sub-goal>\n</update-plan-content>\n\nAfter logging in and seeing the to-do items, you can mark the sub-goal as done:\n\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n\nAt this point, the status of all sub-goals is:\n\n<update-plan-content>\n <sub-goal index=\"1\" status=\"finished\" />\n <sub-goal index=\"2\" status=\"pending\" />\n <sub-goal index=\"3\" status=\"pending\" />\n</update-plan-content>\n\nAfter some time, when the last sub-goal is also completed, you can mark it as done as well:\n\n<mark-sub-goal-done>\n <sub-goal index=\"3\" status=\"finished\" />\n</mark-sub-goal-done>`\n : '';\n\n // Step numbering adjusts based on whether sub-goals are included\n // When includeSubGoals=false, memory step is skipped\n const memoryStepNumber = 2; // Only used when shouldIncludeSubGoals is true\n const checkGoalStepNumber = shouldIncludeSubGoals ? 3 : 2;\n const actionStepNumber = shouldIncludeSubGoals ? 4 : 3;\n\n return `\nTarget: You are an expert to manipulate the UI to accomplish the user's instruction. User will give you an instruction, some screenshots, background knowledge and previous logs indicating what have been done. Your task is to accomplish the instruction by thinking through the path to complete the task and give the next action to execute.\n\n${step1Title}\n\n${step1Description}\n${shouldIncludeSubGoals ? `\\n${OBSERVE_STEP_NOTES}\\n` : ''}\n* <thought> tag (REQUIRED)\n\n${thoughtTagDescription}\n${subGoalTags}\n${\n shouldIncludeSubGoals\n ? `\n## Step ${memoryStepNumber}: Memory Data from Current Screenshot (related tags: <memory>)\n\n${MEMORY_STEP_NOTES}\n\nDon't use this tag if no information needs to be preserved.\n`\n : ''\n}\n## Step ${checkGoalStepNumber}: ${shouldIncludeSubGoals ? 'Check if Goal is Accomplished' : 'Check if the Instruction is Fulfilled'} (related tags: <complete>)\n\n${shouldIncludeSubGoals ? 'Based on the current screenshot and the status of all sub-goals, determine' : 'Determine'} if the entire task is completed.\n\n### CRITICAL: The User's Instruction is the Supreme Authority\n\nThe user's instruction defines the EXACT scope of what you must accomplish. You MUST follow it precisely - nothing more, nothing less. Violating this rule may cause severe consequences such as data loss, unintended operations, or system failures.\n\n**Explicit instructions vs. High-level goals:**\n- If the user gives you **explicit operation steps** (e.g., \"click X\", \"type Y\", \"fill out the form\"), treat them as exact commands. Execute ONLY those steps, nothing more.\n- If the user gives you a **high-level goal** (e.g., \"log in to the system\", \"complete the purchase\"), you may determine the necessary steps to achieve it.\n\n**What \"${shouldIncludeSubGoals ? 'goal accomplished' : 'instruction fulfilled'}\" means:**\n- The ${shouldIncludeSubGoals ? 'goal is accomplished' : 'instruction is fulfilled'} when you have done EXACTLY what the user asked - no extra steps, no assumptions.\n- Do NOT perform any action beyond the explicit instruction, even if it seems logical or helpful.\n\n**Examples - Explicit instructions (execute exactly, no extra steps):**\n- \"fill out the form\" → ${shouldIncludeSubGoals ? 'Goal accomplished' : 'Instruction fulfilled'} when all fields are filled. Do NOT submit the form.\n- \"click the login button\" → ${shouldIncludeSubGoals ? 'Goal accomplished' : 'Instruction fulfilled'} once clicked. Do NOT wait for page load or verify login success.\n- \"type 'hello' in the search box\" → ${shouldIncludeSubGoals ? 'Goal accomplished' : 'Instruction fulfilled'} when 'hello' is typed. Do NOT press Enter or trigger search.\n- \"select the first item\" → ${shouldIncludeSubGoals ? 'Goal accomplished' : 'Instruction fulfilled'} when selected. Do NOT proceed to checkout.\n\n**Special case - Scrollable option lists and dropdowns:**\n- When choosing an item from a scrollable select, dropdown, listbox, menu, or similar option list, first open the control if it is closed. Once the list is open, interact with the list itself, not the page.\n- If the target option is visible in the open list, Tap that exact option immediately.\n- If the list is open but the target option is not visible, try to find it by scrolling the open list/dropdown before giving up or interacting with other elements.\n- For an open dropdown/list, prefer small incremental Scroll actions with an explicit distance (typically 50-120 pixels) and a locate target describing the open list/dropdown. Do NOT omit distance while searching within a list, because the default scroll distance can skip over relevant options and cause oscillation.\n- While searching within an open dropdown/list, use short scrolls so intermediate options are not skipped.\n- After selecting the target, if the trigger text or result text shows the requested option, treat the current selection step as fulfilled and continue evaluating the remaining user instruction.\n\n**Special case - Text hidden by a narrow input field:**\n- CRITICAL PRIORITY OVERRIDE - Input verification after an input action:\n- This rule overrides the general requirement to verify the exact target text from the screenshot.\n- If the previous step already executed an input action, and the current input field is not empty, you MUST directly treat that input as successful.\n- In this situation, do NOT use the visible text in the screenshot to decide that the input is incomplete, incorrect, missing characters, out of order, or needs correction.\n- Do NOT perform ClearInput, another Input, KeyboardPress, or any other action intended to \"fix the input content\" just because the visible text looks different from the target text, has abnormal character order, missing characters, abnormal spacing, suspicious visual recognition results, or appears to have selected/highlighted text.\n- The general rule \"do EXACTLY what the user asked\" still applies to the intended input value you execute, but it MUST NOT be enforced by re-validating the visible text in the screenshot after the input action.\n- Differences in visible text must be assumed to be caused by clipping, horizontal scrolling, narrow input fields, text selection, caret position, or visual recognition errors rather than input failure.\n- Retry input only when the input field is clearly still empty, or when the page shows an explicit error message.\n\n**Special case - Assertion instructions:**\n- If the user's instruction includes an assertion (e.g., \"verify that...\", \"check that...\", \"assert...\"), and you observe from the screenshot that the assertion condition is NOT satisfied and cannot be satisfied, mark ${shouldIncludeSubGoals ? 'the goal' : 'it'} as failed (success=\"false\").\n- If the page is still loading (e.g., you see a loading spinner, skeleton screen, or progress bar), do NOT assert yet. Wait for the page to finish loading before evaluating the assertion.\n\n### Completion Criteria for Process-required Instructions\n\nIf the user's instruction includes explicit operation steps, ordering requirements, or action requirements, it is a process-required instruction.\n\nFor process-required instructions, do NOT treat the task as complete only because the current screenshot already shows the final expected state. Do NOT infer that earlier steps were executed from the final UI state.\n\nYou may output <complete success=\"true\"> only when the current execution history, previous logs, or the screenshot after the most recent action proves that every explicit step required by the user has been completed, and the final check condition is also satisfied.\n\nIf any explicit step lacks completion evidence in the current execution history, continue with the next missing step instead of outputting <complete>, even if the current screenshot appears to satisfy the final condition.\n${\n !shouldIncludeSubGoals\n ? `\n**Page navigation restriction:**\n- Unless the user's instruction explicitly asks you to click a link, jump to another page, or navigate to a URL, you MUST complete the task on the current page only.\n- Do NOT navigate away from the current page on your own initiative (e.g., do not click links that lead to other pages, do not use browser back/forward, do not open new URLs).\n- If the task cannot be accomplished on the current page and the user has not instructed you to navigate, report it as a failure (success=\"false\") instead of attempting to navigate to other pages.\n`\n : ''\n}\n### Output Rules\n\n- If the task is NOT complete, skip this section and continue to Step ${actionStepNumber}.\n- Use the <complete success=\"true|false\">message</complete> tag to output the result if the goal is accomplished or failed.\n - the 'success' attribute is required. ${shouldIncludeSubGoals ? 'It means whether the expected goal is accomplished based on what you observe in the current screenshot and the current execution history. ' : ''}No matter what errors occurred during execution, set success=\"true\" only when the current execution history shows that all steps required by the user have been completed and the final state satisfies the requirement. If the user asks for explicit operation steps or an ordered workflow, do not treat those steps as completed only because the current screenshot already shows the final expected state. If the ${shouldIncludeSubGoals ? 'expected goal is not accomplished and cannot be accomplished' : 'instruction is not fulfilled and cannot be fulfilled'}, set success=\"false\".\n - the 'message' is the information that will be provided to the user. If the user asks for a specific format, strictly follow that.\n- If you output <complete>, do NOT output <action-type> or <action-param-json>. The task ends here.\n\n## Step ${actionStepNumber}: Determine Next Action (related tags: <log>, <action-type>, <action-param-json>, <error>)\n\nONLY if the task is not complete: Think what the next action is according to the current screenshot${shouldIncludeSubGoals ? ' and the plan' : ''}.\n\n- Don't give extra actions or plans beyond the instruction or the plan. For example, don't try to submit the form if the instruction is only to fill something.\n- Consider the current screenshot and give the action that is most likely to accomplish the instruction. For example, if the next step is to click a button but it's not visible in the screenshot, you should try to find it first instead of give a click action.\n- Make sure the previous actions are completed successfully. Otherwise, retry or do something else to recover.\n- Give just the next ONE action you should do (if any)\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 3 times, you should think this is an error and set the \"error\" field to the error message.\n\n${ACTION_STEP_NOTES}\n\n### Supporting actions list\n\n${actionList}\n\n### Log to give user feedback (preamble message)\n\nThe <log> tag is a brief preamble message to the user explaining what you're about to do. It should follow these principles and examples:\n\n- **Use ${preferredLanguage}**\n- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).\n- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what's been done so far and create a sense of momentum and clarity for the user to understand your next actions.\n- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.\n\n**Examples:**\n- <log>Click the login button</log>\n- <log>Scroll to find the 'Yes' button in popup</log>\n- <log>Previous actions failed to find the 'Yes' button, i will try again</log>\n- <log>Go back to find the login button</log>\n\n### If there is some action to do ...\n\n- Use the <action-type> and <action-param-json> tags to output the action to be executed.\n- The <action-type> MUST be one of the supporting actions. 'complete' is NOT a valid action-type.\n- Parameter names are strict. Use EXACTLY the field names listed for the selected action. Do NOT invent alias fields. If an action has a \"sample\" in its description, follow that structure.\nFor example:\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateExample1}\n}\n</action-param-json>\n\n### If you think there is an error ...\n\n- Use the <error> tag to output the error message.\n\nFor example:\n<error>Unable to find the required element on the page</error>\n\n### If there is no action to do ...\n\n- Don't output <action-type> or <action-param-json> if there is no action to do.\n\n## Return Format\n\nReturn in XML format following this decision flow:\n\n**Always include (REQUIRED):**\n<!-- Step 1: Observe${shouldIncludeSubGoals ? ' and Plan' : ''} -->\n<thought>Your thought process here. NEVER skip this tag.</thought>\n${\n shouldIncludeSubGoals\n ? `\n<!-- required when no update-plan-content is provided in the previous response -->\n<update-plan-content>...</update-plan-content>\n\n<!-- required when any sub-goal is completed -->\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n`\n : ''\n}${\n shouldIncludeSubGoals\n ? `\n<!-- Step ${memoryStepNumber}: Memory data from current screenshot if needed -->\n<memory>...</memory>\n`\n : ''\n}\n**Then choose ONE of the following paths:**\n\n**Path A: If the ${shouldIncludeSubGoals ? 'goal is accomplished' : 'instruction is fulfilled'} or failed (Step ${checkGoalStepNumber})**\n<complete success=\"true|false\">...</complete>\n\n**Path B: If the ${shouldIncludeSubGoals ? 'goal is NOT complete' : 'instruction is NOT fulfilled'} yet (Step ${actionStepNumber})**\n<!-- Determine next action -->\n<log>...</log>\n<action-type>...</action-type>\n<action-param-json>...</action-param-json>\n\n<!-- OR if there's an error -->\n<error>...</error>\n${\n shouldIncludeSubGoals\n ? `\n## Multi-turn Conversation Example\n\nBelow is an example of a multi-turn conversation for \"fill out the registration form with name 'John' and email 'john@example.com', then return the filled email address\":\n\n### Turn 1 - Initial instruction\n\n**User message:**\n<user_instruction>fill out the registration form with name 'John' and email 'john@example.com', then return the filled email address</user_instruction>\n\n**Screenshot:** [Shows a registration form with empty Name and Email fields]\n\n**Your response:**\n<thought>The user wants me to fill out the registration form with specific values and return the email address. I can see the form has two fields: Name and Email. Both are currently empty. I'll break this down into sub-goals and start with the Name field. Note: The instruction is to fill the form only (not submit), and return the email at the end.</thought>\n<update-plan-content>\n <sub-goal index=\"1\" status=\"pending\">Fill in the Name field with 'John'</sub-goal>\n <sub-goal index=\"2\" status=\"pending\">Fill in the Email field with 'john@example.com'</sub-goal>\n <sub-goal index=\"3\" status=\"pending\">Return the filled email address</sub-goal>\n</update-plan-content>\n<log>Click on the Name field to start filling the form</log>\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateNameField}\n}\n</action-param-json>\n\n### Turn 2 - After clicking Name field\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\nSub-goals:\n1. Fill in the Name field with 'John' (running)\n2. Fill in the Email field with 'john@example.com' (pending)\n3. Return the filled email address (pending)\nCurrent sub-goal is: Fill in the Name field with 'John'\nActions performed for current sub-goal:\n- Click on the Name field to start filling the form\n\n**Screenshot:** [Shows the form with Name field now focused/active]\n\n**Your response:**\n<thought>The Name field is now focused. I need to type 'John' into this field. Current sub-goal is running, will be completed after input.</thought>\n<log>Typing 'John' into the Name field</log>\n<action-type>Input</action-type>\n<action-param-json>\n{\n \"value\": \"John\"\n}\n</action-param-json>\n\n### Turn 3 - After entering name\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\nSub-goals:\n1. Fill in the Name field with 'John' (running)\n2. Fill in the Email field with 'john@example.com' (pending)\n3. Return the filled email address (pending)\nCurrent sub-goal is: Fill in the Name field with 'John'\nActions performed for current sub-goal:\n- Click on the Name field to start filling the form\n- Typing 'John' into the Name field\n\n**Screenshot:** [Shows the form with Name field containing 'John']\n\n**Your response:**\n<thought>The Name field now shows 'John'. Sub-goal 1 is complete. Next, I need to fill the Email field for sub-goal 2.</thought>\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n<memory>Name field has been filled with 'John'</memory>\n<log>Moving to the Email field</log>\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateEmailField}\n}\n</action-param-json>\n\n### Turn 4 - After clicking Email field\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\nSub-goals:\n1. Fill in the Name field with 'John' (finished)\n2. Fill in the Email field with 'john@example.com' (running)\n3. Return the filled email address (pending)\nCurrent sub-goal is: Fill in the Email field with 'john@example.com'\nActions performed for current sub-goal:\n- Moving to the Email field\n\n**Screenshot:** [Shows the form with Name='John' and Email field focused]\n\n**Your response:**\n<thought>The Email field is now focused. I'll enter 'john@example.com'. After this, sub-goal 2 will be complete and my task will be done.</thought>\n<log>Typing email address into the Email field</log>\n<action-type>Input</action-type>\n<action-param-json>\n{\n \"value\": \"john@example.com\"\n}\n</action-param-json>\n\n### Turn 5 - After entering email (Goal accomplished)\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\nSub-goals:\n1. Fill in the Name field with 'John' (finished)\n2. Fill in the Email field with 'john@example.com' (running)\n3. Return the filled email address (pending)\nCurrent sub-goal is: Fill in the Email field with 'john@example.com'\nActions performed for current sub-goal:\n- Moving to the Email field\n- Typing email address into the Email field\n\n**Screenshot:** [Shows the form with Name='John' and Email='john@example.com']\n\n**Your response:**\n<thought>Both fields are now filled: Name shows 'John' and Email shows 'john@example.com'. Sub-goal 2 is complete. The user asked me to return the filled email address, so I need to include 'john@example.com' in my response. All sub-goals are now finished.</thought>\n<mark-sub-goal-done>\n <sub-goal index=\"2\" status=\"finished\" />\n <sub-goal index=\"3\" status=\"finished\" />\n</mark-sub-goal-done>\n<complete success=\"true\">john@example.com</complete>\n`\n : `\n## Multi-turn Conversation Example\n\nBelow is an example of a multi-turn conversation for \"fill out the registration form with name 'John' and email 'john@example.com', then return the filled email address\":\n\n### Turn 1 - Initial instruction\n\n**User message:**\n<user_instruction>fill out the registration form with name 'John' and email 'john@example.com', then return the filled email address</user_instruction>\n\n**Screenshot:** [Shows a registration form with empty Name and Email fields]\n\n**Your response:**\n<thought>The user wants me to fill out the registration form with specific values and return the email address. I can see the form has two fields: Name and Email. Both are currently empty. I should start by clicking on the Name field. Note: The instruction is to fill the form only (not submit), and return the email at the end.</thought>\n<log>Click on the Name field to start filling the form</log>\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateNameField}\n}\n</action-param-json>\n\n### Turn 2 - After clicking Name field\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\n**Screenshot:** [Shows the form with Name field now focused/active]\n\n**Your response:**\n<thought>The Name field is now focused. I need to type 'John' into this field. After this, I'll need to fill the Email field as well.</thought>\n<log>Typing 'John' into the Name field</log>\n<action-type>Input</action-type>\n<action-param-json>\n{\n \"value\": \"John\"\n}\n</action-param-json>\n\n### Turn 3 - After entering name\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\n**Screenshot:** [Shows the form with Name field containing 'John']\n\n**Your response:**\n<thought>The Name field now shows 'John'. Next, I need to fill the Email field with 'john@example.com'. I'll click on the Email field first.</thought>\n<log>Moving to the Email field</log>\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateEmailField}\n}\n</action-param-json>\n\n### Turn 4 - After clicking Email field\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\n**Screenshot:** [Shows the form with Name='John' and Email field focused]\n\n**Your response:**\n<thought>The Email field is now focused. I'll enter 'john@example.com'. After this, both required fields will be filled and my task will be complete.</thought>\n<log>Typing email address into the Email field</log>\n<action-type>Input</action-type>\n<action-param-json>\n{\n \"value\": \"john@example.com\"\n}\n</action-param-json>\n\n### Turn 5 - After entering email (Instruction fulfilled)\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\n**Screenshot:** [Shows the form with Name='John' and Email='john@example.com']\n\n**Your response:**\n<thought>Both fields are now filled: Name shows 'John' and Email shows 'john@example.com'. The user asked me to return the filled email address, so I should include 'john@example.com' in my response. The instruction has been fulfilled.</thought>\n<complete success=\"true\">john@example.com</complete>\n`\n}`;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","vlLocateParam","modelFamily","bboxDescription","OBSERVE_STEP_NOTES","MEMORY_STEP_NOTES","ACTION_STEP_NOTES","findDefaultValue","field","current","visited","Set","currentWithDef","SAMPLE_BBOXES","injectBboxIntoSample","sample","locateFields","includeBbox","result","bboxIndex","descriptionForAction","action","locatorSchemaTypeDescription","tab","fields","paramLines","schema","isZodObject","shape","isOptional","keyWithOptional","typeName","getZodTypeName","description","getZodDescription","defaultValue","hasDefault","undefined","paramLine","comments","defaultStr","JSON","line","paramDescription","findAllMidsceneLocatorField","sampleWithBbox","sampleStr","systemPromptToTaskPlanning","actionSpace","includeThought","includeSubGoals","preferredLanguage","getPreferredLanguage","Error","actionDescriptionList","actionList","shouldIncludeSubGoals","locateExample1","locateNameField","locateEmailField","step1Title","step1Description","explicitInstructionRule","thoughtTagDescription","subGoalTags","memoryStepNumber","checkGoalStepNumber","actionStepNumber"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;ACKA,MAAMI,gBAAgB,CAACC;IACrB,IAAIA,aACF,OAAO,CAAC,6DAA6D,EAAEC,AAAAA,IAAAA,qCAAAA,eAAAA,AAAAA,EAAgBD,cAAc;IAEvG,OAAO;AACT;AAEA,MAAME,qBAAqB;AAO3B,MAAMC,oBAAoB;AAc1B,MAAMC,oBAAoB;AAS1B,MAAMC,mBAAmB,CAACC;IACxB,IAAIC,UAAUD;IACd,MAAME,UAAU,IAAIC;IAEpB,MAAOF,WAAW,CAACC,QAAQ,GAAG,CAACD,SAAU;QACvCC,QAAQ,GAAG,CAACD;QACZ,MAAMG,iBAAiBH;QAQvB,IAAI,CAACG,eAAe,IAAI,EAAE,UAAU;QAEpC,IAAIA,AAAiC,iBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAC9B,OAAOA,eAAe,IAAI,CAAC,YAAY;QAIzC,IACEA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,IAC5BA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAE5BH,UAAUG,eAAe,IAAI,CAAC,SAAS;aAEvC;IAEJ;AAGF;AAOA,MAAMC,gBAAoD;IACxD;QAAC;QAAI;QAAK;QAAK;KAAI;IACnB;QAAC;QAAK;QAAK;QAAK;KAAI;IACpB;QAAC;QAAK;QAAK;QAAK;KAAI;IACpB;QAAC;QAAI;QAAK;QAAK;KAAI;CACpB;AAED,MAAMC,uBAAuB,CAC3BC,QACAC,cACAC;IAEA,IAAI,CAACA,aAAa,OAAOF;IACzB,MAAMG,SAAS;QAAE,GAAGH,MAAM;IAAC;IAC3B,IAAII,YAAY;IAChB,KAAK,MAAMX,SAASQ,aAClB,IACEE,MAAM,CAACV,MAAM,IACb,AAAyB,YAAzB,OAAOU,MAAM,CAACV,MAAM,IACpBU,MAAM,CAACV,MAAM,CAAC,MAAM,EACpB;QACAU,MAAM,CAACV,MAAM,GAAG;YACd,GAAGU,MAAM,CAACV,MAAM;YAChB,MAAMK,aAAa,CAACM,YAAYN,cAAc,MAAM,CAAC;QACvD;QACAM;IACF;IAEF,OAAOD;AACT;AAEO,MAAME,uBAAuB,CAClCC,QACAC,8BACAL,cAAc,KAAK;IAEnB,MAAMM,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEH,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMI,aAAuB,EAAE;QAG/B,MAAMC,SAASL,OAAO,WAAW;QAIjC,MAAMM,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAE1B,KAAK,MAAM,CAAC9B,KAAKY,MAAM,IAAIX,OAAO,OAAO,CAAC+B,OACxC,IAAIpB,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAMqB,aACJ,AACE,cADF,OAAQrB,MAAyC,UAAU,IAE1DA,MAAwC,UAAU;gBACrD,MAAMsB,kBAAkBD,aAAa,GAAGjC,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAMmC,WAAWC,AAAAA,IAAAA,iCAAAA,cAAAA,AAAAA,EAAexB,OAAOc;gBAGvC,MAAMW,cAAcC,AAAAA,IAAAA,iCAAAA,iBAAAA,AAAAA,EAAkB1B;gBAGtC,MAAM2B,eAAe5B,iBAAiBC;gBACtC,MAAM4B,aAAaD,AAAiBE,WAAjBF;gBAGnB,IAAIG,YAAY,GAAGR,gBAAgB,EAAE,EAAEC,UAAU;gBACjD,MAAMQ,WAAqB,EAAE;gBAC7B,IAAIN,aACFM,SAAS,IAAI,CAACN;gBAEhB,IAAIG,YAAY;oBACd,MAAMI,aACJ,AAAwB,YAAxB,OAAOL,eACH,CAAC,CAAC,EAAEA,aAAa,CAAC,CAAC,GACnBM,KAAK,SAAS,CAACN;oBACrBI,SAAS,IAAI,CAAC,CAAC,SAAS,EAAEC,YAAY;gBACxC;gBACA,IAAID,SAAS,MAAM,GAAG,GACpBD,aAAa,CAAC,IAAI,EAAEC,SAAS,IAAI,CAAC,OAAO;gBAG3Cd,WAAW,IAAI,CAACa;YAClB;YAIF,IAAIb,WAAW,MAAM,GAAG,GAAG;gBACzBD,OAAO,IAAI,CAAC;gBACZC,WAAW,OAAO,CAAC,CAACiB;oBAClBlB,OAAO,IAAI,CAAC,CAAC,IAAI,EAAEkB,MAAM;gBAC3B;YACF;QACF,OAAO;YAEL,MAAMX,WAAWC,AAAAA,IAAAA,iCAAAA,cAAAA,AAAAA,EAAeN;YAChC,MAAMO,cAAcC,AAAAA,IAAAA,iCAAAA,iBAAAA,AAAAA,EAAkBR;YAGtC,IAAIiB,mBAAmB,CAAC,SAAS,EAAEZ,UAAU;YAC7C,IAAIE,aACFU,oBAAoB,CAAC,IAAI,EAAEV,aAAa;YAE1CU,oBAAoB;YAEpBnB,OAAO,IAAI,CAACmB;QACd;IACF;IAGA,IAAItB,OAAO,MAAM,IAAI,AAAyB,YAAzB,OAAOA,OAAO,MAAM,EAAe;QACtD,MAAML,eAAe4B,AAAAA,IAAAA,mCAAAA,2BAAAA,AAAAA,EAA4BvB,OAAO,WAAW;QACnE,MAAMwB,iBAAiB/B,qBACrBO,OAAO,MAAM,EACbL,cACAC;QAEF,MAAM6B,YAAY,CAAC,WAAW,EAAEvB,MAAMA,IAAI,aAAa,EAAEF,OAAO,IAAI,CAAC,gBAAgB,EAAEE,MAAMA,IAAI,qBAAqB,EAAEA,MAAMA,MAAMkB,KAAK,SAAS,CAACI,gBAAgB,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,EAAE,EAAEtB,MAAMA,KAAK,EAAE,EAAE,EAAEA,MAAMA,IAAI,oBAAoB,CAAC;QAChPC,OAAO,IAAI,CAACsB;IACd;IAEA,OAAO,CAAC,EAAE,EAAEzB,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAewB,2BAA2B,EAC/CC,WAAW,EACX9C,WAAW,EACXe,WAAW,EACXgC,cAAc,EACdC,eAAe,EAOhB;IACC,MAAMC,oBAAoBC,AAAAA,IAAAA,oBAAAA,oBAAAA,AAAAA;IAG1B,IAAInC,eAAe,CAACf,aAClB,MAAM,IAAImD,MACR;IAIJ,MAAMC,wBAAwBN,YAAY,GAAG,CAAC,CAAC3B,SACtCD,qBACLC,QACApB,cAAcgB,cAAcf,cAAcmC,SAC1CpB;IAGJ,MAAMsC,aAAaD,sBAAsB,IAAI,CAAC;IAG9C,MAAME,wBAAwBN,mBAAmB;IAGjD,MAAMO,iBAAiBxC,cACnB,CAAC;;;GAGJ,CAAC,GACE,CAAC;;GAEJ,CAAC;IAGF,MAAMyC,kBAAkBzC,cACpB,CAAC;;;GAGJ,CAAC,GACE,CAAC;;GAEJ,CAAC;IAEF,MAAM0C,mBAAmB1C,cACrB,CAAC;;;GAGJ,CAAC,GACE,CAAC;;GAEJ,CAAC;IAMF,MAAM2C,aAAaJ,wBACf,uGACA;IAEJ,MAAMK,mBAAmBL,wBACrB,wNACA;IAEJ,MAAMM,0BAA0B;IAEhC,MAAMC,wBAAwBP,wBAC1B,CAAC;;;;AAIP,EAAEM,yBAAyB,GACrB,CAAC;;;;AAIP,EAAEA,yBAAyB;IAEzB,MAAME,cAAcR,wBAChB,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;qBAuDc,CAAC,GAChB;IAIJ,MAAMS,mBAAmB;IACzB,MAAMC,sBAAsBV,wBAAwB,IAAI;IACxD,MAAMW,mBAAmBX,wBAAwB,IAAI;IAErD,OAAO,CAAC;;;AAGV,EAAEI,WAAW;;AAEb,EAAEC,iBAAiB;AACnB,EAAEL,wBAAwB,CAAC,EAAE,EAAEpD,mBAAmB,EAAE,CAAC,GAAG,GAAG;;;AAG3D,EAAE2D,sBAAsB;AACxB,EAAEC,YAAY;AACd,EACER,wBACI,CAAC;QACC,EAAES,iBAAiB;;AAE3B,EAAE5D,kBAAkB;;;AAGpB,CAAC,GACK,GACL;QACO,EAAE6D,oBAAoB,EAAE,EAAEV,wBAAwB,kCAAkC,wCAAwC;;AAEpI,EAAEA,wBAAwB,+EAA+E,YAAY;;;;;;;;;;QAU7G,EAAEA,wBAAwB,sBAAsB,wBAAwB;MAC1E,EAAEA,wBAAwB,yBAAyB,2BAA2B;;;;wBAI5D,EAAEA,wBAAwB,sBAAsB,wBAAwB;6BACnE,EAAEA,wBAAwB,sBAAsB,wBAAwB;qCAChE,EAAEA,wBAAwB,sBAAsB,wBAAwB;4BACjF,EAAEA,wBAAwB,sBAAsB,wBAAwB;;;;;;;;;;;;;;;;;;;;;0NAqBsH,EAAEA,wBAAwB,aAAa,KAAK;;;;;;;;;;;;AAYtQ,EACE,CAACA,wBACG,CAAC;;;;;AAKP,CAAC,GACK,GACL;;;sEAGqE,EAAEW,iBAAiB;;yCAEhD,EAAEX,wBAAwB,+IAA+I,GAAG,wZAAwZ,EAAEA,wBAAwB,iEAAiE,uDAAuD;;;;QAIvvB,EAAEW,iBAAiB;;mGAEwE,EAAEX,wBAAwB,kBAAkB,GAAG;;;;;;;;AAQlJ,EAAElD,kBAAkB;;;;AAIpB,EAAEiD,WAAW;;;;;;QAML,EAAEJ,kBAAkB;;;;;;;;;;;;;;;;;;;;YAoBhB,EAAEM,eAAe;;;;;;;;;;;;;;;;;;;;oBAoBT,EAAED,wBAAwB,cAAc,GAAG;;AAE/D,EACEA,wBACI,CAAC;;;;;;;;AAQP,CAAC,GACK,KAEJA,wBACI,CAAC;UACG,EAAES,iBAAiB;;AAE7B,CAAC,GACK,GACL;;;iBAGgB,EAAET,wBAAwB,yBAAyB,2BAA2B,iBAAiB,EAAEU,oBAAoB;;;iBAGrH,EAAEV,wBAAwB,yBAAyB,+BAA+B,WAAW,EAAEW,iBAAiB;;;;;;;;AAQjI,EACEX,wBACI,CAAC;;;;;;;;;;;;;;;;;;;;;;;YAuBK,EAAEE,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAuDlB,EAAEC,iBAAiB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAoD/B,CAAC,GACK,CAAC;;;;;;;;;;;;;;;;;;YAkBK,EAAED,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAkClB,EAAEC,iBAAiB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA+B/B,CAAC,EACC;AACF"}
1
+ {"version":3,"file":"ai-model/prompt/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import { findAllMidsceneLocatorField } from '@/common';\nimport type { DeviceAction } from '@/types';\nimport { getPreferredLanguage } from '@midscene/shared/env';\nimport {\n getZodDescription,\n getZodTypeName,\n} from '@midscene/shared/zod-schema-utils';\nimport type { z } from 'zod';\nimport { planningModelFamilyRequiredForLocateMessage } from '../errors';\nimport type { LocateResultPromptSpec } from '../shared/model-locate-result';\nimport { locateGroundingRules } from './locate-grounding-rules';\nimport { locateParamExample } from './locate-param-example';\n\nconst locateParamSchemaDescription = (promptSpec?: LocateResultPromptSpec) => {\n if (promptSpec) {\n return `{${promptSpec.resultKey}: ${promptSpec.resultValueSchema}, prompt: string } // ${promptSpec.resultValueDescription}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\nconst OBSERVE_STEP_NOTES = [\n '### Observation Guidelines',\n '',\n '- Treat visible summaries, thumbnails, cropped content, and partially visible lists as potentially incomplete when the task depends on precise details.',\n '- If the current view does not provide enough information to decide safely, use available UI affordances such as opening details, expanding content, previewing, enlarging, zooming, or scrolling before acting.',\n].join('\\n');\n\nconst MEMORY_STEP_NOTES = [\n 'Use `<memory>` to record clear, task-relevant information from the current screenshot that may be needed in later steps. The current screenshot will not be available later, so memory should preserve enough detail for future reasoning, verification, or action.',\n '',\n '- Record information completely and exactly as shown. Do not summarize, translate, normalize, or merge values that may matter later.',\n '- When recording an item, include the item itself, its exact task-relevant details, and the visible cue or UI context that identifies where it came from when relevant.',\n '- Keep similar or repeated items as separate memory entries unless their task-relevant details are confirmed to be the same.',\n '- After navigation, scrolling, editing, deletion, saving, or other screen changes, treat remembered positions, order, indexes, and UI bindings as references only. Re-check the current screen before acting on them.',\n '',\n 'Examples:',\n '- If you need to find an item and later assert its details, record the item name and the exact details needed for the assertion, such as status, price, date, owner, description, or other visible fields.',\n '- If you need to compare multiple similar results, record each candidate separately with its exact distinguishing details and visible context.',\n '- If you need to copy information from one place to another, record the exact source value and the target field or UI cue it should be mapped to.',\n].join('\\n');\n\nconst ACTION_STEP_NOTES = [\n '### Action Guidelines',\n '',\n '- When editing existing text in a UI field, preserve all existing text by moving the cursor and typing/deleting the minimal necessary characters, and use Input with mode \"typeOnly\" when typing new characters for such edits.',\n].join('\\n');\n\n/**\n * Find ZodDefault in the wrapper chain and return its default value\n */\nconst findDefaultValue = (field: unknown): any | undefined => {\n let current = field;\n const visited = new Set<unknown>();\n\n while (current && !visited.has(current)) {\n visited.add(current);\n const currentWithDef = current as {\n _def?: {\n typeName?: string;\n defaultValue?: () => any;\n innerType?: unknown;\n };\n };\n\n if (!currentWithDef._def?.typeName) break;\n\n if (currentWithDef._def.typeName === 'ZodDefault') {\n return currentWithDef._def.defaultValue?.();\n }\n\n // Continue unwrapping if it's a wrapper type\n if (\n currentWithDef._def.typeName === 'ZodOptional' ||\n currentWithDef._def.typeName === 'ZodNullable'\n ) {\n current = currentWithDef._def.innerType;\n } else {\n break;\n }\n }\n\n return undefined;\n};\n\n/**\n * Inject model locate results into locate fields of a sample object.\n * Walks the sample and for any locate field (identified by paramSchema),\n * adds a fake locate result when includeLocateInPlanning is true.\n */\nconst injectLocateResultIntoSample = (\n sample: Record<string, any>,\n locateFields: string[],\n promptSpec: LocateResultPromptSpec,\n): Record<string, any> => {\n const resultKey = promptSpec.resultKey;\n const sampleResults = promptSpec.exampleValues;\n const result = { ...sample };\n let sampleResultIndex = 0;\n for (const field of locateFields) {\n if (\n result[field] &&\n typeof result[field] === 'object' &&\n result[field].prompt\n ) {\n result[field] = {\n ...result[field],\n [resultKey]: sampleResults[sampleResultIndex % sampleResults.length],\n };\n sampleResultIndex++;\n }\n }\n return result;\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locateParamTypeDescription: string,\n includeLocateInPlanning = false,\n locatePromptSpec?: LocateResultPromptSpec,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as {\n _def?: { typeName?: string };\n shape?: Record<string, unknown>;\n };\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as { isOptional?: () => boolean }).isOptional ===\n 'function' &&\n (field as { isOptional: () => boolean }).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name using extracted helper\n const typeName = getZodTypeName(field, locateParamTypeDescription);\n\n // Get description using extracted helper\n const description = getZodDescription(field as z.ZodTypeAny);\n\n // Check if field has a default value by searching the wrapper chain\n const defaultValue = findDefaultValue(field);\n const hasDefault = defaultValue !== undefined;\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n const comments: string[] = [];\n if (description) {\n comments.push(description);\n }\n if (hasDefault) {\n const defaultStr =\n typeof defaultValue === 'string'\n ? `\"${defaultValue}\"`\n : JSON.stringify(defaultValue);\n comments.push(`default: ${defaultStr}`);\n }\n if (comments.length > 0) {\n paramLine += ` // ${comments.join(', ')}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n const typeName = getZodTypeName(schema);\n const description = getZodDescription(schema as z.ZodTypeAny);\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n // Render sample if provided, using the same XML tag format as the real output\n if (action.sample && typeof action.sample === 'object') {\n const locateFields = findAllMidsceneLocatorField(action.paramSchema);\n const sampleWithLocateResult =\n includeLocateInPlanning && locatePromptSpec\n ? injectLocateResultIntoSample(\n action.sample,\n locateFields,\n locatePromptSpec,\n )\n : action.sample;\n const sampleStr = `- sample:\\n${tab}${tab}<action-type>${action.name}</action-type>\\n${tab}${tab}<action-param-json>\\n${tab}${tab}${JSON.stringify(sampleWithLocateResult, null, 2).replace(/\\n/g, `\\n${tab}${tab}`)}\\n${tab}${tab}</action-param-json>`;\n fields.push(sampleStr);\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n locatePromptSpec,\n includeLocateInPlanning,\n includeThought,\n includeSubGoals,\n}: {\n actionSpace: DeviceAction<any>[];\n locatePromptSpec?: LocateResultPromptSpec;\n includeLocateInPlanning: boolean;\n includeThought?: boolean;\n includeSubGoals?: boolean;\n}) {\n const preferredLanguage = getPreferredLanguage();\n\n if (includeLocateInPlanning && !locatePromptSpec) {\n throw new Error(planningModelFamilyRequiredForLocateMessage());\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n locateParamSchemaDescription(\n includeLocateInPlanning ? locatePromptSpec : undefined,\n ),\n includeLocateInPlanning,\n locatePromptSpec,\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n const shouldIncludeThought = includeThought ?? true;\n const shouldIncludeSubGoals = includeSubGoals ?? false;\n\n const locateExample = (prompt: string, exampleValueIndex: number) =>\n locateParamExample(\n prompt,\n includeLocateInPlanning ? locatePromptSpec : undefined,\n locatePromptSpec?.exampleValues[exampleValueIndex] ??\n locatePromptSpec?.exampleValues[0],\n );\n const locateExample1 = locateExample(\n 'Add to cart button for Sauce Labs Backpack',\n 1,\n );\n const locateNameField = locateExample(\n 'Name input field in the registration form',\n 2,\n );\n const locateEmailField = locateExample(\n 'Email input field in the registration form',\n 3,\n );\n\n const thoughtTag = (content: string) =>\n shouldIncludeThought ? `<thought>${content}</thought>\\n` : '';\n\n // Sub-goals related content - only included when shouldIncludeSubGoals is true\n const step1Title = shouldIncludeSubGoals\n ? '## Step 1: Observe and Plan (related tags: <thought>, <update-plan-content>, <mark-sub-goal-done>)'\n : '## Step 1: Observe (related tags: <thought>)';\n\n const step1Description = shouldIncludeSubGoals\n ? \"First, observe the current screenshot and previous logs, then break down the user's instruction into multiple high-level sub-goals. Update the status of sub-goals based on what you see in the current screenshot.\"\n : 'First, observe the current screenshot and previous logs to understand the current state.';\n\n const explicitInstructionRule = `CRITICAL - Following Explicit Instructions: When the user gives you specific operation steps (not high-level goals), you MUST execute ONLY those exact steps - nothing more, nothing less. Do NOT add extra actions even if they seem logical. For example: \"fill out the form\" means only fill fields, do NOT submit; \"click the button\" means only click, do NOT wait for page load or verify results; \"type 'hello'\" means only type, do NOT press Enter.`;\n\n const thoughtTagDescription = shouldIncludeSubGoals\n ? `REQUIRED: You MUST always output the <thought> tag. Never skip it.\n\nInclude your thought process in the <thought> tag. It should answer: What is the user's requirement? What is the current state based on the screenshot? Are all sub-goals completed? If not, what should be the next action? Write your thoughts naturally without numbering or section headers.\n\n${explicitInstructionRule}`\n : `REQUIRED: You MUST always output the <thought> tag. Never skip it.\n\nInclude your thought process in the <thought> tag. It should answer: What is the current state based on the screenshot? What should be the next action? Write your thoughts naturally without numbering or section headers.\n\n${explicitInstructionRule}`;\n\n const subGoalTags = shouldIncludeSubGoals\n ? `\n\n* <update-plan-content> tag\n\nUse this structure to give or update your plan:\n\n<update-plan-content>\n <sub-goal index=\"1\" status=\"finished|pending\">sub goal description</sub-goal>\n <sub-goal index=\"2\" status=\"finished|pending\">sub goal description</sub-goal>\n ...\n</update-plan-content>\n\n* <mark-sub-goal-done> tag\n\nUse this structure to mark a sub-goal as done:\n\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n\nIMPORTANT: You MUST only mark a sub-goal as \"finished\" AFTER you have confirmed the task is actually completed by observing the result in the screenshot. Do NOT mark a sub-goal as done just because you expect the next action will complete it. Wait until you see visual confirmation in the screenshot that the sub-goal has been achieved.\n\n* Note\n\nDuring execution, you can call <update-plan-content> at any time to update the plan based on the latest screenshot and completed sub-goals.\n\n### Example\n\nIf the user wants to \"log in to a system using username and password, complete all to-do items, and submit a registration form\", you can break it down into the following sub-goals:\n\n<thought>...</thought>\n<update-plan-content>\n <sub-goal index=\"1\" status=\"pending\">Log in to the system</sub-goal>\n <sub-goal index=\"2\" status=\"pending\">Complete all to-do items</sub-goal>\n <sub-goal index=\"3\" status=\"pending\">Submit the registration form</sub-goal>\n</update-plan-content>\n\nAfter logging in and seeing the to-do items, you can mark the sub-goal as done:\n\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n\nAt this point, the status of all sub-goals is:\n\n<update-plan-content>\n <sub-goal index=\"1\" status=\"finished\" />\n <sub-goal index=\"2\" status=\"pending\" />\n <sub-goal index=\"3\" status=\"pending\" />\n</update-plan-content>\n\nAfter some time, when the last sub-goal is also completed, you can mark it as done as well:\n\n<mark-sub-goal-done>\n <sub-goal index=\"3\" status=\"finished\" />\n</mark-sub-goal-done>`\n : '';\n\n // Step numbering adjusts based on whether sub-goals are included\n // When includeSubGoals=false, memory step is skipped\n const memoryStepNumber = 2; // Only used when shouldIncludeSubGoals is true\n const checkGoalStepNumber = shouldIncludeSubGoals ? 3 : 2;\n const actionStepNumber = shouldIncludeSubGoals ? 4 : 3;\n\n return `\nTarget: You are an expert to manipulate the UI to accomplish the user's instruction. User will give you an instruction, some screenshots, background knowledge and previous logs indicating what have been done. Your task is to accomplish the instruction by thinking through the path to complete the task and give the next action to execute.\n\n${step1Title}\n\n${step1Description}\n${shouldIncludeSubGoals ? `\\n${OBSERVE_STEP_NOTES}\\n` : ''}\n* <thought> tag (REQUIRED)\n\n${thoughtTagDescription}\n${subGoalTags}\n${\n shouldIncludeSubGoals\n ? `\n## Step ${memoryStepNumber}: Memory Data from Current Screenshot (related tags: <memory>)\n\n${MEMORY_STEP_NOTES}\n\nDon't use this tag if no information needs to be preserved.\n`\n : ''\n}\n## Step ${checkGoalStepNumber}: ${shouldIncludeSubGoals ? 'Check if Goal is Accomplished' : 'Check if the Instruction is Fulfilled'} (related tags: <complete>)\n\n${shouldIncludeSubGoals ? 'Based on the current screenshot and the status of all sub-goals, determine' : 'Determine'} if the entire task is completed.\n\n### CRITICAL: The User's Instruction is the Supreme Authority\n\nThe user's instruction defines the EXACT scope of what you must accomplish. You MUST follow it precisely - nothing more, nothing less. Violating this rule may cause severe consequences such as data loss, unintended operations, or system failures.\n\n**Explicit instructions vs. High-level goals:**\n- If the user gives you **explicit operation steps** (e.g., \"click X\", \"type Y\", \"fill out the form\"), treat them as exact commands. Execute ONLY those steps, nothing more.\n- If the user gives you a **high-level goal** (e.g., \"log in to the system\", \"complete the purchase\"), you may determine the necessary steps to achieve it.\n\n**What \"${shouldIncludeSubGoals ? 'goal accomplished' : 'instruction fulfilled'}\" means:**\n- The ${shouldIncludeSubGoals ? 'goal is accomplished' : 'instruction is fulfilled'} when you have done EXACTLY what the user asked - no extra steps, no assumptions.\n- Do NOT perform any action beyond the explicit instruction, even if it seems logical or helpful.\n\n**Examples - Explicit instructions (execute exactly, no extra steps):**\n- \"fill out the form\" → ${shouldIncludeSubGoals ? 'Goal accomplished' : 'Instruction fulfilled'} when all fields are filled. Do NOT submit the form.\n- \"click the login button\" → ${shouldIncludeSubGoals ? 'Goal accomplished' : 'Instruction fulfilled'} once clicked. Do NOT wait for page load or verify login success.\n- \"type 'hello' in the search box\" → ${shouldIncludeSubGoals ? 'Goal accomplished' : 'Instruction fulfilled'} when 'hello' is typed. Do NOT press Enter or trigger search.\n- \"select the first item\" → ${shouldIncludeSubGoals ? 'Goal accomplished' : 'Instruction fulfilled'} when selected. Do NOT proceed to checkout.\n\n**Special case - Scrollable option lists and dropdowns:**\n- When choosing an item from a scrollable select, dropdown, listbox, menu, or similar option list, first open the control if it is closed. Once the list is open, interact with the list itself, not the page.\n- If the target option is visible in the open list, Tap that exact option immediately.\n- If the list is open but the target option is not visible, try to find it by scrolling the open list/dropdown before giving up or interacting with other elements.\n- For an open dropdown/list, prefer small incremental Scroll actions with an explicit distance (typically 50-120 pixels) and a locate target describing the open list/dropdown. Do NOT omit distance while searching within a list, because the default scroll distance can skip over relevant options and cause oscillation.\n- While searching within an open dropdown/list, use short scrolls so intermediate options are not skipped.\n- After selecting the target, if the trigger text or result text shows the requested option, treat the current selection step as fulfilled and continue evaluating the remaining user instruction.\n\n**Special case - Text hidden by a narrow input field:**\n- CRITICAL PRIORITY OVERRIDE - Input verification after an input action:\n- This rule overrides the general requirement to verify the exact target text from the screenshot.\n- If the previous step already executed an input action, and the current input field is not empty, you MUST directly treat that input as successful.\n- In this situation, do NOT use the visible text in the screenshot to decide that the input is incomplete, incorrect, missing characters, out of order, or needs correction.\n- Do NOT perform ClearInput, another Input, KeyboardPress, or any other action intended to \"fix the input content\" just because the visible text looks different from the target text, has abnormal character order, missing characters, abnormal spacing, suspicious visual recognition results, or appears to have selected/highlighted text.\n- The general rule \"do EXACTLY what the user asked\" still applies to the intended input value you execute, but it MUST NOT be enforced by re-validating the visible text in the screenshot after the input action.\n- Differences in visible text must be assumed to be caused by clipping, horizontal scrolling, narrow input fields, text selection, caret position, or visual recognition errors rather than input failure.\n- Retry input only when the input field is clearly still empty, or when the page shows an explicit error message.\n\n**Special case - Assertion instructions:**\n- If the user's instruction includes an assertion (e.g., \"verify that...\", \"check that...\", \"assert...\"), and you observe from the screenshot that the assertion condition is NOT satisfied and cannot be satisfied, mark ${shouldIncludeSubGoals ? 'the goal' : 'it'} as failed (success=\"false\").\n- If the page is still loading (e.g., you see a loading spinner, skeleton screen, or progress bar), do NOT assert yet. Wait for the page to finish loading before evaluating the assertion.\n\n### Completion Criteria for Process-required Instructions\n\nIf the user's instruction includes explicit operation steps, ordering requirements, or action requirements, it is a process-required instruction.\n\nFor process-required instructions, do NOT treat the task as complete only because the current screenshot already shows the final expected state. Do NOT infer that earlier steps were executed from the final UI state.\n\nYou may output <complete success=\"true\"> only when the current execution history, previous logs, or the screenshot after the most recent action proves that every explicit step required by the user has been completed, and the final check condition is also satisfied.\n\nIf any explicit step lacks completion evidence in the current execution history, continue with the next missing step instead of outputting <complete>, even if the current screenshot appears to satisfy the final condition.\n${\n !shouldIncludeSubGoals\n ? `\n**Page navigation restriction:**\n- Unless the user's instruction explicitly asks you to click a link, jump to another page, or navigate to a URL, you MUST complete the task on the current page only.\n- Do NOT navigate away from the current page on your own initiative (e.g., do not click links that lead to other pages, do not use browser back/forward, do not open new URLs).\n- If the task cannot be accomplished on the current page and the user has not instructed you to navigate, report it as a failure (success=\"false\") instead of attempting to navigate to other pages.\n`\n : ''\n}\n### Output Rules\n\n- If the task is NOT complete, skip this section and continue to Step ${actionStepNumber}.\n- Use the <complete success=\"true|false\">message</complete> tag to output the result if the goal is accomplished or failed.\n - the 'success' attribute is required. ${shouldIncludeSubGoals ? 'It means whether the expected goal is accomplished based on what you observe in the current screenshot and the current execution history. ' : ''}No matter what errors occurred during execution, set success=\"true\" only when the current execution history shows that all steps required by the user have been completed and the final state satisfies the requirement. If the user asks for explicit operation steps or an ordered workflow, do not treat those steps as completed only because the current screenshot already shows the final expected state. If the ${shouldIncludeSubGoals ? 'expected goal is not accomplished and cannot be accomplished' : 'instruction is not fulfilled and cannot be fulfilled'}, set success=\"false\".\n - the 'message' is the information that will be provided to the user. If the user asks for a specific format, strictly follow that.\n- If you output <complete>, do NOT output <action-type> or <action-param-json>. The task ends here.\n\n## Step ${actionStepNumber}: Determine Next Action (related tags: <log>, <action-type>, <action-param-json>, <error>)\n\nONLY if the task is not complete: Think what the next action is according to the current screenshot${shouldIncludeSubGoals ? ' and the plan' : ''}.\n\n- Don't give extra actions or plans beyond the instruction or the plan. For example, don't try to submit the form if the instruction is only to fill something.\n- Consider the current screenshot and give the action that is most likely to accomplish the instruction. For example, if the next step is to click a button but it's not visible in the screenshot, you should try to find it first instead of give a click action.\n- Make sure the previous actions are completed successfully. Otherwise, retry or do something else to recover.\n- Give just the next ONE action you should do (if any)\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 3 times, you should think this is an error and set the \"error\" field to the error message.\n\n${ACTION_STEP_NOTES}\n\n${\n includeLocateInPlanning\n ? `${locateGroundingRules()}\n\n`\n : ''\n}### Supporting actions list\n\n${actionList}\n\n### Log to give user feedback (preamble message)\n\nThe <log> tag is a brief preamble message to the user explaining what you're about to do. It should follow these principles and examples:\n\n- **Use ${preferredLanguage}**\n- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).\n- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what's been done so far and create a sense of momentum and clarity for the user to understand your next actions.\n- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.\n\n**Examples:**\n- <log>Click the login button</log>\n- <log>Scroll to find the 'Yes' button in popup</log>\n- <log>Previous actions failed to find the 'Yes' button, i will try again</log>\n- <log>Go back to find the login button</log>\n\n### If there is some action to do ...\n\n- Use the <action-type> and <action-param-json> tags to output the action to be executed.\n- The <action-type> MUST be one of the supporting actions. 'complete' is NOT a valid action-type.\n- Parameter names are strict. Use EXACTLY the field names listed for the selected action. Do NOT invent alias fields. If an action has a \"sample\" in its description, follow that structure.\nFor example:\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateExample1}\n}\n</action-param-json>\n\n### If you think there is an error ...\n\n- Use the <error> tag to output the error message.\n\nFor example:\n<error>Unable to find the required element on the page</error>\n\n### If there is no action to do ...\n\n- Don't output <action-type> or <action-param-json> if there is no action to do.\n\n## Return Format\n\nReturn in XML format following this decision flow:\n\n**Always include (REQUIRED):**\n<!-- Step 1: Observe${shouldIncludeSubGoals ? ' and Plan' : ''} -->\n<thought>Your thought process here. NEVER skip this tag.</thought>\n${\n shouldIncludeSubGoals\n ? `\n<!-- required when no update-plan-content is provided in the previous response -->\n<update-plan-content>...</update-plan-content>\n\n<!-- required when any sub-goal is completed -->\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n`\n : ''\n}${\n shouldIncludeSubGoals\n ? `\n<!-- Step ${memoryStepNumber}: Memory data from current screenshot if needed -->\n<memory>...</memory>\n`\n : ''\n}\n**Then choose ONE of the following paths:**\n\n**Path A: If the ${shouldIncludeSubGoals ? 'goal is accomplished' : 'instruction is fulfilled'} or failed (Step ${checkGoalStepNumber})**\n<complete success=\"true|false\">...</complete>\n\n**Path B: If the ${shouldIncludeSubGoals ? 'goal is NOT complete' : 'instruction is NOT fulfilled'} yet (Step ${actionStepNumber})**\n<!-- Determine next action -->\n<log>...</log>\n<action-type>...</action-type>\n<action-param-json>...</action-param-json>\n\n<!-- OR if there's an error -->\n<error>...</error>\n${\n shouldIncludeSubGoals\n ? `\n## Multi-turn Conversation Example\n\nBelow is an example of a multi-turn conversation for \"fill out the registration form with name 'John' and email 'john@example.com', then return the filled email address\":\n\n### Turn 1 - Initial instruction\n\n**User message:**\n<user_instruction>fill out the registration form with name 'John' and email 'john@example.com', then return the filled email address</user_instruction>\n\n**Screenshot:** [Shows a registration form with empty Name and Email fields]\n\n**Your response:**\n<thought>The user wants me to fill out the registration form with specific values and return the email address. I can see the form has two fields: Name and Email. Both are currently empty. I'll break this down into sub-goals and start with the Name field. Note: The instruction is to fill the form only (not submit), and return the email at the end.</thought>\n<update-plan-content>\n <sub-goal index=\"1\" status=\"pending\">Fill in the Name field with 'John'</sub-goal>\n <sub-goal index=\"2\" status=\"pending\">Fill in the Email field with 'john@example.com'</sub-goal>\n <sub-goal index=\"3\" status=\"pending\">Return the filled email address</sub-goal>\n</update-plan-content>\n<log>Click on the Name field to start filling the form</log>\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateNameField}\n}\n</action-param-json>\n\n### Turn 2 - After clicking Name field\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\nSub-goals:\n1. Fill in the Name field with 'John' (running)\n2. Fill in the Email field with 'john@example.com' (pending)\n3. Return the filled email address (pending)\nCurrent sub-goal is: Fill in the Name field with 'John'\nActions performed for current sub-goal:\n- Click on the Name field to start filling the form\n\n**Screenshot:** [Shows the form with Name field now focused/active]\n\n**Your response:**\n<thought>The Name field is now focused. I need to type 'John' into this field. Current sub-goal is running, will be completed after input.</thought>\n<log>Typing 'John' into the Name field</log>\n<action-type>Input</action-type>\n<action-param-json>\n{\n \"value\": \"John\"\n}\n</action-param-json>\n\n### Turn 3 - After entering name\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\nSub-goals:\n1. Fill in the Name field with 'John' (running)\n2. Fill in the Email field with 'john@example.com' (pending)\n3. Return the filled email address (pending)\nCurrent sub-goal is: Fill in the Name field with 'John'\nActions performed for current sub-goal:\n- Click on the Name field to start filling the form\n- Typing 'John' into the Name field\n\n**Screenshot:** [Shows the form with Name field containing 'John']\n\n**Your response:**\n<thought>The Name field now shows 'John'. Sub-goal 1 is complete. Next, I need to fill the Email field for sub-goal 2.</thought>\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n<memory>Name field has been filled with 'John'</memory>\n<log>Moving to the Email field</log>\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateEmailField}\n}\n</action-param-json>\n\n### Turn 4 - After clicking Email field\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\nSub-goals:\n1. Fill in the Name field with 'John' (finished)\n2. Fill in the Email field with 'john@example.com' (running)\n3. Return the filled email address (pending)\nCurrent sub-goal is: Fill in the Email field with 'john@example.com'\nActions performed for current sub-goal:\n- Moving to the Email field\n\n**Screenshot:** [Shows the form with Name='John' and Email field focused]\n\n**Your response:**\n<thought>The Email field is now focused. I'll enter 'john@example.com'. After this, sub-goal 2 will be complete and my task will be done.</thought>\n<log>Typing email address into the Email field</log>\n<action-type>Input</action-type>\n<action-param-json>\n{\n \"value\": \"john@example.com\"\n}\n</action-param-json>\n\n### Turn 5 - After entering email (Goal accomplished)\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\nSub-goals:\n1. Fill in the Name field with 'John' (finished)\n2. Fill in the Email field with 'john@example.com' (running)\n3. Return the filled email address (pending)\nCurrent sub-goal is: Fill in the Email field with 'john@example.com'\nActions performed for current sub-goal:\n- Moving to the Email field\n- Typing email address into the Email field\n\n**Screenshot:** [Shows the form with Name='John' and Email='john@example.com']\n\n**Your response:**\n<thought>Both fields are now filled: Name shows 'John' and Email shows 'john@example.com'. Sub-goal 2 is complete. The user asked me to return the filled email address, so I need to include 'john@example.com' in my response. All sub-goals are now finished.</thought>\n<mark-sub-goal-done>\n <sub-goal index=\"2\" status=\"finished\" />\n <sub-goal index=\"3\" status=\"finished\" />\n</mark-sub-goal-done>\n<complete success=\"true\">john@example.com</complete>\n`\n : `\n## Multi-turn Conversation Example\n\nBelow is an example of a multi-turn conversation for \"fill out the registration form with name 'John' and email 'john@example.com', then return the filled email address\":\n\n### Turn 1 - Initial instruction\n\n**User message:**\n<user_instruction>fill out the registration form with name 'John' and email 'john@example.com', then return the filled email address</user_instruction>\n\n**Screenshot:** [Shows a registration form with empty Name and Email fields]\n\n**Your response:**\n<thought>The user wants me to fill out the registration form with specific values and return the email address. I can see the form has two fields: Name and Email. Both are currently empty. I should start by clicking on the Name field. Note: The instruction is to fill the form only (not submit), and return the email at the end.</thought>\n<log>Click on the Name field to start filling the form</log>\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateNameField}\n}\n</action-param-json>\n\n### Turn 2 - After clicking Name field\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\n**Screenshot:** [Shows the form with Name field now focused/active]\n\n**Your response:**\n<thought>The Name field is now focused. I need to type 'John' into this field. After this, I'll need to fill the Email field as well.</thought>\n<log>Typing 'John' into the Name field</log>\n<action-type>Input</action-type>\n<action-param-json>\n{\n \"value\": \"John\"\n}\n</action-param-json>\n\n### Turn 3 - After entering name\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\n**Screenshot:** [Shows the form with Name field containing 'John']\n\n**Your response:**\n<thought>The Name field now shows 'John'. Next, I need to fill the Email field with 'john@example.com'. I'll click on the Email field first.</thought>\n<log>Moving to the Email field</log>\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateEmailField}\n}\n</action-param-json>\n\n### Turn 4 - After clicking Email field\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\n**Screenshot:** [Shows the form with Name='John' and Email field focused]\n\n**Your response:**\n<thought>The Email field is now focused. I'll enter 'john@example.com'. After this, both required fields will be filled and my task will be complete.</thought>\n<log>Typing email address into the Email field</log>\n<action-type>Input</action-type>\n<action-param-json>\n{\n \"value\": \"john@example.com\"\n}\n</action-param-json>\n\n### Turn 5 - After entering email (Instruction fulfilled)\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\n**Screenshot:** [Shows the form with Name='John' and Email='john@example.com']\n\n**Your response:**\n<thought>Both fields are now filled: Name shows 'John' and Email shows 'john@example.com'. The user asked me to return the filled email address, so I should include 'john@example.com' in my response. The instruction has been fulfilled.</thought>\n<complete success=\"true\">john@example.com</complete>\n`\n}`;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","locateParamSchemaDescription","promptSpec","OBSERVE_STEP_NOTES","MEMORY_STEP_NOTES","ACTION_STEP_NOTES","findDefaultValue","field","current","visited","Set","currentWithDef","injectLocateResultIntoSample","sample","locateFields","resultKey","sampleResults","result","sampleResultIndex","descriptionForAction","action","locateParamTypeDescription","includeLocateInPlanning","locatePromptSpec","tab","fields","paramLines","schema","isZodObject","shape","isOptional","keyWithOptional","typeName","getZodTypeName","description","getZodDescription","defaultValue","hasDefault","undefined","paramLine","comments","defaultStr","JSON","line","paramDescription","findAllMidsceneLocatorField","sampleWithLocateResult","sampleStr","systemPromptToTaskPlanning","actionSpace","includeThought","includeSubGoals","preferredLanguage","getPreferredLanguage","Error","planningModelFamilyRequiredForLocateMessage","actionDescriptionList","actionList","shouldIncludeSubGoals","locateExample","prompt","exampleValueIndex","locateParamExample","locateExample1","locateNameField","locateEmailField","step1Title","step1Description","explicitInstructionRule","thoughtTagDescription","subGoalTags","memoryStepNumber","checkGoalStepNumber","actionStepNumber","locateGroundingRules"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;ACOA,MAAMI,+BAA+B,CAACC;IACpC,IAAIA,YACF,OAAO,CAAC,CAAC,EAAEA,WAAW,SAAS,CAAC,EAAE,EAAEA,WAAW,iBAAiB,CAAC,sBAAsB,EAAEA,WAAW,sBAAsB,EAAE;IAE9H,OAAO;AACT;AAEA,MAAMC,qBAAqB;AAO3B,MAAMC,oBAAoB;AAc1B,MAAMC,oBAAoB;AAS1B,MAAMC,mBAAmB,CAACC;IACxB,IAAIC,UAAUD;IACd,MAAME,UAAU,IAAIC;IAEpB,MAAOF,WAAW,CAACC,QAAQ,GAAG,CAACD,SAAU;QACvCC,QAAQ,GAAG,CAACD;QACZ,MAAMG,iBAAiBH;QAQvB,IAAI,CAACG,eAAe,IAAI,EAAE,UAAU;QAEpC,IAAIA,AAAiC,iBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAC9B,OAAOA,eAAe,IAAI,CAAC,YAAY;QAIzC,IACEA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,IAC5BA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAE5BH,UAAUG,eAAe,IAAI,CAAC,SAAS;aAEvC;IAEJ;AAGF;AAOA,MAAMC,+BAA+B,CACnCC,QACAC,cACAZ;IAEA,MAAMa,YAAYb,WAAW,SAAS;IACtC,MAAMc,gBAAgBd,WAAW,aAAa;IAC9C,MAAMe,SAAS;QAAE,GAAGJ,MAAM;IAAC;IAC3B,IAAIK,oBAAoB;IACxB,KAAK,MAAMX,SAASO,aAClB,IACEG,MAAM,CAACV,MAAM,IACb,AAAyB,YAAzB,OAAOU,MAAM,CAACV,MAAM,IACpBU,MAAM,CAACV,MAAM,CAAC,MAAM,EACpB;QACAU,MAAM,CAACV,MAAM,GAAG;YACd,GAAGU,MAAM,CAACV,MAAM;YAChB,CAACQ,UAAU,EAAEC,aAAa,CAACE,oBAAoBF,cAAc,MAAM,CAAC;QACtE;QACAE;IACF;IAEF,OAAOD;AACT;AAEO,MAAME,uBAAuB,CAClCC,QACAC,4BACAC,0BAA0B,KAAK,EAC/BC;IAEA,MAAMC,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEL,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMM,aAAuB,EAAE;QAG/B,MAAMC,SAASP,OAAO,WAAW;QAIjC,MAAMQ,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAE1B,KAAK,MAAM,CAAC/B,KAAKW,MAAM,IAAIV,OAAO,OAAO,CAACgC,OACxC,IAAItB,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAMuB,aACJ,AACE,cADF,OAAQvB,MAAyC,UAAU,IAE1DA,MAAwC,UAAU;gBACrD,MAAMwB,kBAAkBD,aAAa,GAAGlC,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAMoC,WAAWC,AAAAA,IAAAA,iCAAAA,cAAAA,AAAAA,EAAe1B,OAAOc;gBAGvC,MAAMa,cAAcC,AAAAA,IAAAA,iCAAAA,iBAAAA,AAAAA,EAAkB5B;gBAGtC,MAAM6B,eAAe9B,iBAAiBC;gBACtC,MAAM8B,aAAaD,AAAiBE,WAAjBF;gBAGnB,IAAIG,YAAY,GAAGR,gBAAgB,EAAE,EAAEC,UAAU;gBACjD,MAAMQ,WAAqB,EAAE;gBAC7B,IAAIN,aACFM,SAAS,IAAI,CAACN;gBAEhB,IAAIG,YAAY;oBACd,MAAMI,aACJ,AAAwB,YAAxB,OAAOL,eACH,CAAC,CAAC,EAAEA,aAAa,CAAC,CAAC,GACnBM,KAAK,SAAS,CAACN;oBACrBI,SAAS,IAAI,CAAC,CAAC,SAAS,EAAEC,YAAY;gBACxC;gBACA,IAAID,SAAS,MAAM,GAAG,GACpBD,aAAa,CAAC,IAAI,EAAEC,SAAS,IAAI,CAAC,OAAO;gBAG3Cd,WAAW,IAAI,CAACa;YAClB;YAIF,IAAIb,WAAW,MAAM,GAAG,GAAG;gBACzBD,OAAO,IAAI,CAAC;gBACZC,WAAW,OAAO,CAAC,CAACiB;oBAClBlB,OAAO,IAAI,CAAC,CAAC,IAAI,EAAEkB,MAAM;gBAC3B;YACF;QACF,OAAO;YAEL,MAAMX,WAAWC,AAAAA,IAAAA,iCAAAA,cAAAA,AAAAA,EAAeN;YAChC,MAAMO,cAAcC,AAAAA,IAAAA,iCAAAA,iBAAAA,AAAAA,EAAkBR;YAGtC,IAAIiB,mBAAmB,CAAC,SAAS,EAAEZ,UAAU;YAC7C,IAAIE,aACFU,oBAAoB,CAAC,IAAI,EAAEV,aAAa;YAE1CU,oBAAoB;YAEpBnB,OAAO,IAAI,CAACmB;QACd;IACF;IAGA,IAAIxB,OAAO,MAAM,IAAI,AAAyB,YAAzB,OAAOA,OAAO,MAAM,EAAe;QACtD,MAAMN,eAAe+B,AAAAA,IAAAA,mCAAAA,2BAAAA,AAAAA,EAA4BzB,OAAO,WAAW;QACnE,MAAM0B,yBACJxB,2BAA2BC,mBACvBX,6BACEQ,OAAO,MAAM,EACbN,cACAS,oBAEFH,OAAO,MAAM;QACnB,MAAM2B,YAAY,CAAC,WAAW,EAAEvB,MAAMA,IAAI,aAAa,EAAEJ,OAAO,IAAI,CAAC,gBAAgB,EAAEI,MAAMA,IAAI,qBAAqB,EAAEA,MAAMA,MAAMkB,KAAK,SAAS,CAACI,wBAAwB,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,EAAE,EAAEtB,MAAMA,KAAK,EAAE,EAAE,EAAEA,MAAMA,IAAI,oBAAoB,CAAC;QACxPC,OAAO,IAAI,CAACsB;IACd;IAEA,OAAO,CAAC,EAAE,EAAE3B,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEI,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAewB,2BAA2B,EAC/CC,WAAW,EACX1B,gBAAgB,EAChBD,uBAAuB,EACvB4B,cAAc,EACdC,eAAe,EAOhB;IACC,MAAMC,oBAAoBC,AAAAA,IAAAA,oBAAAA,oBAAAA,AAAAA;IAE1B,IAAI/B,2BAA2B,CAACC,kBAC9B,MAAM,IAAI+B,MAAMC,AAAAA,IAAAA,mCAAAA,2CAAAA,AAAAA;IAGlB,MAAMC,wBAAwBP,YAAY,GAAG,CAAC,CAAC7B,SACtCD,qBACLC,QACAnB,6BACEqB,0BAA0BC,mBAAmBe,SAE/ChB,yBACAC;IAGJ,MAAMkC,aAAaD,sBAAsB,IAAI,CAAC;IAG9C,MAAME,wBAAwBP,mBAAmB;IAEjD,MAAMQ,gBAAgB,CAACC,QAAgBC,oBACrCC,AAAAA,IAAAA,iDAAAA,kBAAAA,AAAAA,EACEF,QACAtC,0BAA0BC,mBAAmBe,QAC7Cf,kBAAkB,aAAa,CAACsC,kBAAkB,IAChDtC,kBAAkB,aAAa,CAAC,EAAE;IAExC,MAAMwC,iBAAiBJ,cACrB,8CACA;IAEF,MAAMK,kBAAkBL,cACtB,6CACA;IAEF,MAAMM,mBAAmBN,cACvB,8CACA;IAOF,MAAMO,aAAaR,wBACf,uGACA;IAEJ,MAAMS,mBAAmBT,wBACrB,wNACA;IAEJ,MAAMU,0BAA0B;IAEhC,MAAMC,wBAAwBX,wBAC1B,CAAC;;;;AAIP,EAAEU,yBAAyB,GACrB,CAAC;;;;AAIP,EAAEA,yBAAyB;IAEzB,MAAME,cAAcZ,wBAChB,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;qBAuDc,CAAC,GAChB;IAIJ,MAAMa,mBAAmB;IACzB,MAAMC,sBAAsBd,wBAAwB,IAAI;IACxD,MAAMe,mBAAmBf,wBAAwB,IAAI;IAErD,OAAO,CAAC;;;AAGV,EAAEQ,WAAW;;AAEb,EAAEC,iBAAiB;AACnB,EAAET,wBAAwB,CAAC,EAAE,EAAEvD,mBAAmB,EAAE,CAAC,GAAG,GAAG;;;AAG3D,EAAEkE,sBAAsB;AACxB,EAAEC,YAAY;AACd,EACEZ,wBACI,CAAC;QACC,EAAEa,iBAAiB;;AAE3B,EAAEnE,kBAAkB;;;AAGpB,CAAC,GACK,GACL;QACO,EAAEoE,oBAAoB,EAAE,EAAEd,wBAAwB,kCAAkC,wCAAwC;;AAEpI,EAAEA,wBAAwB,+EAA+E,YAAY;;;;;;;;;;QAU7G,EAAEA,wBAAwB,sBAAsB,wBAAwB;MAC1E,EAAEA,wBAAwB,yBAAyB,2BAA2B;;;;wBAI5D,EAAEA,wBAAwB,sBAAsB,wBAAwB;6BACnE,EAAEA,wBAAwB,sBAAsB,wBAAwB;qCAChE,EAAEA,wBAAwB,sBAAsB,wBAAwB;4BACjF,EAAEA,wBAAwB,sBAAsB,wBAAwB;;;;;;;;;;;;;;;;;;;;;0NAqBsH,EAAEA,wBAAwB,aAAa,KAAK;;;;;;;;;;;;AAYtQ,EACE,CAACA,wBACG,CAAC;;;;;AAKP,CAAC,GACK,GACL;;;sEAGqE,EAAEe,iBAAiB;;yCAEhD,EAAEf,wBAAwB,+IAA+I,GAAG,wZAAwZ,EAAEA,wBAAwB,iEAAiE,uDAAuD;;;;QAIvvB,EAAEe,iBAAiB;;mGAEwE,EAAEf,wBAAwB,kBAAkB,GAAG;;;;;;;;AAQlJ,EAAErD,kBAAkB;;AAEpB,EACEiB,0BACI,GAAGoD,AAAAA,IAAAA,mDAAAA,oBAAAA,AAAAA,IAAuB;;AAEhC,CAAC,GACK,GACL;;AAED,EAAEjB,WAAW;;;;;;QAML,EAAEL,kBAAkB;;;;;;;;;;;;;;;;;;;;YAoBhB,EAAEW,eAAe;;;;;;;;;;;;;;;;;;;;oBAoBT,EAAEL,wBAAwB,cAAc,GAAG;;AAE/D,EACEA,wBACI,CAAC;;;;;;;;AAQP,CAAC,GACK,KAEJA,wBACI,CAAC;UACG,EAAEa,iBAAiB;;AAE7B,CAAC,GACK,GACL;;;iBAGgB,EAAEb,wBAAwB,yBAAyB,2BAA2B,iBAAiB,EAAEc,oBAAoB;;;iBAGrH,EAAEd,wBAAwB,yBAAyB,+BAA+B,WAAW,EAAEe,iBAAiB;;;;;;;;AAQjI,EACEf,wBACI,CAAC;;;;;;;;;;;;;;;;;;;;;;;YAuBK,EAAEM,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAuDlB,EAAEC,iBAAiB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAoD/B,CAAC,GACK,CAAC;;;;;;;;;;;;;;;;;;YAkBK,EAAED,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAkClB,EAAEC,iBAAiB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA+B/B,CAAC,EACC;AACF"}
@@ -28,10 +28,15 @@ __webpack_require__.d(__webpack_exports__, {
28
28
  systemPromptToLocateSection: ()=>systemPromptToLocateSection
29
29
  });
30
30
  const env_namespaceObject = require("@midscene/shared/env");
31
- const external_common_js_namespaceObject = require("./common.js");
32
- function systemPromptToLocateSection(modelFamily) {
31
+ const external_locate_param_example_js_namespaceObject = require("./locate-param-example.js");
32
+ function systemPromptToLocateSection(promptSpec) {
33
33
  const preferredLanguage = (0, env_namespaceObject.getPreferredLanguage)();
34
- const bboxFormat = (0, external_common_js_namespaceObject.bboxDescription)(modelFamily);
34
+ const resultKey = promptSpec.resultKey;
35
+ const exampleValueText = (0, external_locate_param_example_js_namespaceObject.formatLocateExampleValue)(promptSpec.exampleValues[0]);
36
+ const resultJsonProperty = `"${resultKey}": ${promptSpec.resultValueSchema}, // ${promptSpec.resultValueDescription}`;
37
+ const resultValueType = promptSpec.resultValueSchema;
38
+ const resultFieldDescription = `${promptSpec.resultNoun} of the section containing the target element`;
39
+ const referenceFieldDescription = `Optional array of ${promptSpec.resultNounPlural} of reference elements`;
35
40
  return `
36
41
  ## Role:
37
42
  You are an AI assistant that helps identify UI elements.
@@ -43,9 +48,9 @@ You are an AI assistant that helps identify UI elements.
43
48
  ## Output Format:
44
49
  \`\`\`json
45
50
  {
46
- "bbox": [number, number, number, number], // ${bboxFormat}
47
- "references_bbox"?: [
48
- [number, number, number, number],
51
+ ${resultJsonProperty}
52
+ "references_${resultKey}"?: [
53
+ ${resultValueType},
49
54
  ...
50
55
  ],
51
56
  "error"?: string
@@ -53,16 +58,16 @@ You are an AI assistant that helps identify UI elements.
53
58
  \`\`\`
54
59
 
55
60
  Fields:
56
- * \`bbox\` - Bounding box of the section containing the target element
57
- * \`references_bbox\` - Optional array of bounding boxes for reference elements
61
+ * \`${resultKey}\` - ${resultFieldDescription}
62
+ * \`references_${resultKey}\` - ${referenceFieldDescription}
58
63
  * \`error\` - Optional error message if the section cannot be found. Use ${preferredLanguage}.
59
64
 
60
65
  Example:
61
66
  If the description is "delete button on the second row with title 'Peter'", return:
62
67
  \`\`\`json
63
68
  {
64
- "bbox": [100, 100, 200, 200],
65
- "references_bbox": [[100, 100, 200, 200]]
69
+ "${resultKey}": ${exampleValueText},
70
+ "references_${resultKey}": [${exampleValueText}]
66
71
  }
67
72
  \`\`\`
68
73
  `;
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/prompt/llm-section-locator.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/llm-section-locator.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { TModelFamily } from '@midscene/shared/env';\nimport { getPreferredLanguage } from '@midscene/shared/env';\nimport { bboxDescription } from './common';\n\nexport function systemPromptToLocateSection(\n modelFamily: TModelFamily | undefined,\n) {\n const preferredLanguage = getPreferredLanguage();\n const bboxFormat = bboxDescription(modelFamily);\n return `\n## Role:\nYou are an AI assistant that helps identify UI elements.\n\n## Objective:\n- Find a section containing the target element\n- If the description mentions reference elements, also locate sections containing those references\n\n## Output Format:\n\\`\\`\\`json\n{\n \"bbox\": [number, number, number, number], // ${bboxFormat}\n \"references_bbox\"?: [\n [number, number, number, number],\n ...\n ],\n \"error\"?: string\n}\n\\`\\`\\`\n\nFields:\n* \\`bbox\\` - Bounding box of the section containing the target element\n* \\`references_bbox\\` - Optional array of bounding boxes for reference elements\n* \\`error\\` - Optional error message if the section cannot be found. Use ${preferredLanguage}.\n\nExample:\nIf the description is \"delete button on the second row with title 'Peter'\", return:\n\\`\\`\\`json\n{\n \"bbox\": [100, 100, 200, 200],\n \"references_bbox\": [[100, 100, 200, 200]]\n}\n\\`\\`\\`\n`;\n}\n\nexport const sectionLocatorInstruction = (sectionDescription: string) =>\n `Find section containing: ${sectionDescription}`;\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","systemPromptToLocateSection","modelFamily","preferredLanguage","getPreferredLanguage","bboxFormat","bboxDescription","sectionLocatorInstruction","sectionDescription"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;ACFO,SAASI,4BACdC,WAAqC;IAErC,MAAMC,oBAAoBC,AAAAA,IAAAA,oBAAAA,oBAAAA,AAAAA;IAC1B,MAAMC,aAAaC,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EAAgBJ;IACnC,OAAO,CAAC;;;;;;;;;;;gDAWsC,EAAEG,WAAW;;;;;;;;;;;;yEAYY,EAAEF,kBAAkB;;;;;;;;;;AAU7F,CAAC;AACD;AAEO,MAAMI,4BAA4B,CAACC,qBACxC,CAAC,yBAAyB,EAAEA,oBAAoB"}
1
+ {"version":3,"file":"ai-model/prompt/llm-section-locator.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/llm-section-locator.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import { getPreferredLanguage } from '@midscene/shared/env';\nimport type { LocateResultPromptSpec } from '../shared/model-locate-result';\nimport { formatLocateExampleValue } from './locate-param-example';\n\nexport function systemPromptToLocateSection(\n promptSpec: LocateResultPromptSpec,\n) {\n const preferredLanguage = getPreferredLanguage();\n const resultKey = promptSpec.resultKey;\n const exampleValueText = formatLocateExampleValue(\n promptSpec.exampleValues[0],\n );\n const resultJsonProperty = `\"${resultKey}\": ${promptSpec.resultValueSchema}, // ${promptSpec.resultValueDescription}`;\n const resultValueType = promptSpec.resultValueSchema;\n const resultFieldDescription = `${promptSpec.resultNoun} of the section containing the target element`;\n const referenceFieldDescription = `Optional array of ${promptSpec.resultNounPlural} of reference elements`;\n return `\n## Role:\nYou are an AI assistant that helps identify UI elements.\n\n## Objective:\n- Find a section containing the target element\n- If the description mentions reference elements, also locate sections containing those references\n\n## Output Format:\n\\`\\`\\`json\n{\n ${resultJsonProperty}\n \"references_${resultKey}\"?: [\n ${resultValueType},\n ...\n ],\n \"error\"?: string\n}\n\\`\\`\\`\n\nFields:\n* \\`${resultKey}\\` - ${resultFieldDescription}\n* \\`references_${resultKey}\\` - ${referenceFieldDescription}\n* \\`error\\` - Optional error message if the section cannot be found. Use ${preferredLanguage}.\n\nExample:\nIf the description is \"delete button on the second row with title 'Peter'\", return:\n\\`\\`\\`json\n{\n \"${resultKey}\": ${exampleValueText},\n \"references_${resultKey}\": [${exampleValueText}]\n}\n\\`\\`\\`\n`;\n}\n\nexport const sectionLocatorInstruction = (sectionDescription: string) =>\n `Find section containing: ${sectionDescription}`;\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","systemPromptToLocateSection","promptSpec","preferredLanguage","getPreferredLanguage","resultKey","exampleValueText","formatLocateExampleValue","resultJsonProperty","resultValueType","resultFieldDescription","referenceFieldDescription","sectionLocatorInstruction","sectionDescription"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;ACFO,SAASI,4BACdC,UAAkC;IAElC,MAAMC,oBAAoBC,AAAAA,IAAAA,oBAAAA,oBAAAA,AAAAA;IAC1B,MAAMC,YAAYH,WAAW,SAAS;IACtC,MAAMI,mBAAmBC,AAAAA,IAAAA,iDAAAA,wBAAAA,AAAAA,EACvBL,WAAW,aAAa,CAAC,EAAE;IAE7B,MAAMM,qBAAqB,CAAC,CAAC,EAAEH,UAAU,GAAG,EAAEH,WAAW,iBAAiB,CAAC,MAAM,EAAEA,WAAW,sBAAsB,EAAE;IACtH,MAAMO,kBAAkBP,WAAW,iBAAiB;IACpD,MAAMQ,yBAAyB,GAAGR,WAAW,UAAU,CAAC,6CAA6C,CAAC;IACtG,MAAMS,4BAA4B,CAAC,kBAAkB,EAAET,WAAW,gBAAgB,CAAC,sBAAsB,CAAC;IAC1G,OAAO,CAAC;;;;;;;;;;;EAWR,EAAEM,mBAAmB;cACT,EAAEH,UAAU;IACtB,EAAEI,gBAAgB;;;;;;;;IAQlB,EAAEJ,UAAU,KAAK,EAAEK,uBAAuB;eAC/B,EAAEL,UAAU,KAAK,EAAEM,0BAA0B;yEACa,EAAER,kBAAkB;;;;;;GAM1F,EAAEE,UAAU,GAAG,EAAEC,iBAAiB;cACvB,EAAED,UAAU,IAAI,EAAEC,iBAAiB;;;AAGjD,CAAC;AACD;AAEO,MAAMM,4BAA4B,CAACC,qBACxC,CAAC,yBAAyB,EAAEA,oBAAoB"}
@@ -0,0 +1,43 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.d = (exports1, definition)=>{
5
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
+ enumerable: true,
7
+ get: definition[key]
8
+ });
9
+ };
10
+ })();
11
+ (()=>{
12
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
+ })();
14
+ (()=>{
15
+ __webpack_require__.r = (exports1)=>{
16
+ if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
+ value: 'Module'
18
+ });
19
+ Object.defineProperty(exports1, '__esModule', {
20
+ value: true
21
+ });
22
+ };
23
+ })();
24
+ var __webpack_exports__ = {};
25
+ __webpack_require__.r(__webpack_exports__);
26
+ __webpack_require__.d(__webpack_exports__, {
27
+ locateGroundingRules: ()=>locateGroundingRules
28
+ });
29
+ function locateGroundingRules() {
30
+ return `## Important Notes for Locating Elements:
31
+ - When the user describes an element that contains text (such as buttons, input fields, dropdown options, radio buttons, etc.), you should locate ONLY the text region of that element, not the entire element boundary.
32
+ - For example: If an input field is large (both wide and tall) with a placeholder text "Please enter your comment", you should locate only the area where the placeholder text appears, not the entire input field.
33
+ - This principle applies to all text-containing elements: focus on the visible text region rather than the full element container.`;
34
+ }
35
+ exports.locateGroundingRules = __webpack_exports__.locateGroundingRules;
36
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
37
+ "locateGroundingRules"
38
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
39
+ Object.defineProperty(exports, '__esModule', {
40
+ value: true
41
+ });
42
+
43
+ //# sourceMappingURL=locate-grounding-rules.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai-model/prompt/locate-grounding-rules.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/locate-grounding-rules.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","export function locateGroundingRules() {\n return `## Important Notes for Locating Elements:\n- When the user describes an element that contains text (such as buttons, input fields, dropdown options, radio buttons, etc.), you should locate ONLY the text region of that element, not the entire element boundary.\n- For example: If an input field is large (both wide and tall) with a placeholder text \"Please enter your comment\", you should locate only the area where the placeholder text appears, not the entire input field.\n- This principle applies to all text-containing elements: focus on the visible text region rather than the full element container.`;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","locateGroundingRules"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;ACNO,SAASI;IACd,OAAO,CAAC;;;kIAGwH,CAAC;AACnI"}
@@ -0,0 +1,52 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.d = (exports1, definition)=>{
5
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
+ enumerable: true,
7
+ get: definition[key]
8
+ });
9
+ };
10
+ })();
11
+ (()=>{
12
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
+ })();
14
+ (()=>{
15
+ __webpack_require__.r = (exports1)=>{
16
+ if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
+ value: 'Module'
18
+ });
19
+ Object.defineProperty(exports1, '__esModule', {
20
+ value: true
21
+ });
22
+ };
23
+ })();
24
+ var __webpack_exports__ = {};
25
+ __webpack_require__.r(__webpack_exports__);
26
+ __webpack_require__.d(__webpack_exports__, {
27
+ formatLocateExampleValue: ()=>formatLocateExampleValue,
28
+ locateParamExample: ()=>locateParamExample
29
+ });
30
+ function formatLocateExampleValue(value) {
31
+ return Array.isArray(value) ? `[${value.join(', ')}]` : JSON.stringify(value);
32
+ }
33
+ function locateParamExample(prompt, promptSpec, exampleValue) {
34
+ if (!promptSpec) return `{
35
+ "prompt": ${JSON.stringify(prompt)}
36
+ }`;
37
+ return `{
38
+ "prompt": ${JSON.stringify(prompt)},
39
+ "${promptSpec.resultKey}": ${formatLocateExampleValue(exampleValue ?? promptSpec.exampleValues[0])}
40
+ }`;
41
+ }
42
+ exports.formatLocateExampleValue = __webpack_exports__.formatLocateExampleValue;
43
+ exports.locateParamExample = __webpack_exports__.locateParamExample;
44
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
45
+ "formatLocateExampleValue",
46
+ "locateParamExample"
47
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
48
+ Object.defineProperty(exports, '__esModule', {
49
+ value: true
50
+ });
51
+
52
+ //# sourceMappingURL=locate-param-example.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai-model/prompt/locate-param-example.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/locate-param-example.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { LocateResultPromptSpec } from '../shared/model-locate-result';\n\nexport function formatLocateExampleValue(value: unknown): string {\n return Array.isArray(value) ? `[${value.join(', ')}]` : JSON.stringify(value);\n}\n\nexport function locateParamExample(\n prompt: string,\n promptSpec?: LocateResultPromptSpec,\n exampleValue?: unknown,\n): string {\n if (!promptSpec) {\n return `{\n \"prompt\": ${JSON.stringify(prompt)}\n }`;\n }\n\n return `{\n \"prompt\": ${JSON.stringify(prompt)},\n \"${promptSpec.resultKey}\": ${formatLocateExampleValue(\n exampleValue ?? promptSpec.exampleValues[0],\n )}\n }`;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","formatLocateExampleValue","value","Array","JSON","locateParamExample","prompt","promptSpec","exampleValue"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;ACJO,SAASI,yBAAyBC,KAAc;IACrD,OAAOC,MAAM,OAAO,CAACD,SAAS,CAAC,CAAC,EAAEA,MAAM,IAAI,CAAC,MAAM,CAAC,CAAC,GAAGE,KAAK,SAAS,CAACF;AACzE;AAEO,SAASG,mBACdC,MAAc,EACdC,UAAmC,EACnCC,YAAsB;IAEtB,IAAI,CAACD,YACH,OAAO,CAAC;cACE,EAAEH,KAAK,SAAS,CAACE,QAAQ;GACpC,CAAC;IAGF,OAAO,CAAC;cACI,EAAEF,KAAK,SAAS,CAACE,QAAQ;KAClC,EAAEC,WAAW,SAAS,CAAC,GAAG,EAAEN,yBAC3BO,gBAAgBD,WAAW,aAAa,CAAC,EAAE,EAC3C;GACH,CAAC;AACJ"}
@@ -38,7 +38,7 @@ __webpack_require__.d(__webpack_exports__, {
38
38
  const constants_namespaceObject = require("@midscene/shared/constants");
39
39
  const external_index_js_namespaceObject = require("../index.js");
40
40
  const external_yaml_generator_js_namespaceObject = require("./yaml-generator.js");
41
- const generatePlaywrightTest = async (events, options, modelConfig)=>{
41
+ const generatePlaywrightTest = async (events, options, modelRuntime)=>{
42
42
  (0, external_yaml_generator_js_namespaceObject.validateEvents)(events);
43
43
  const summary = (0, external_yaml_generator_js_namespaceObject.prepareEventSummary)(events, {
44
44
  testName: options.testName,
@@ -84,11 +84,11 @@ ${constants_namespaceObject.PLAYWRIGHT_EXAMPLE_CODE}`;
84
84
  content: messageContent
85
85
  }
86
86
  ];
87
- const response = await (0, external_index_js_namespaceObject.callAIWithStringResponse)(prompt, modelConfig);
87
+ const response = await (0, external_index_js_namespaceObject.callAIWithStringResponse)(prompt, modelRuntime);
88
88
  if (response?.content && 'string' == typeof response.content) return response.content;
89
89
  throw new Error('Failed to generate Playwright test code');
90
90
  };
91
- const generatePlaywrightTestStream = async (events, options, modelConfig)=>{
91
+ const generatePlaywrightTestStream = async (events, options, modelRuntime)=>{
92
92
  (0, external_yaml_generator_js_namespaceObject.validateEvents)(events);
93
93
  const summary = (0, external_yaml_generator_js_namespaceObject.prepareEventSummary)(events, {
94
94
  testName: options.testName,
@@ -135,12 +135,12 @@ ${constants_namespaceObject.PLAYWRIGHT_EXAMPLE_CODE}`;
135
135
  content: messageContent
136
136
  }
137
137
  ];
138
- if (options.stream && options.onChunk) return await (0, external_index_js_namespaceObject.callAI)(prompt, modelConfig, {
138
+ if (options.stream && options.onChunk) return await (0, external_index_js_namespaceObject.callAI)(prompt, modelRuntime, {
139
139
  stream: true,
140
140
  onChunk: options.onChunk
141
141
  });
142
142
  {
143
- const response = await (0, external_index_js_namespaceObject.callAIWithStringResponse)(prompt, modelConfig);
143
+ const response = await (0, external_index_js_namespaceObject.callAIWithStringResponse)(prompt, modelRuntime);
144
144
  if (response?.content && 'string' == typeof response.content) return {
145
145
  content: response.content,
146
146
  usage: response.usage,