@midscene/core 1.8.11 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (304) hide show
  1. package/dist/es/agent/agent.mjs +40 -50
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/task-builder.mjs +39 -19
  4. package/dist/es/agent/task-builder.mjs.map +1 -1
  5. package/dist/es/agent/tasks.mjs +24 -22
  6. package/dist/es/agent/tasks.mjs.map +1 -1
  7. package/dist/es/agent/utils.mjs +11 -14
  8. package/dist/es/agent/utils.mjs.map +1 -1
  9. package/dist/es/ai-model/connectivity.mjs +7 -3
  10. package/dist/es/ai-model/connectivity.mjs.map +1 -1
  11. package/dist/es/ai-model/errors.mjs +9 -0
  12. package/dist/es/ai-model/errors.mjs.map +1 -0
  13. package/dist/es/ai-model/index.mjs +3 -4
  14. package/dist/es/ai-model/inspect.mjs +132 -144
  15. package/dist/es/ai-model/inspect.mjs.map +1 -1
  16. package/dist/es/ai-model/llm-planning.mjs +46 -28
  17. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  18. package/dist/es/ai-model/{auto-glm → models/auto-glm}/actions.mjs +22 -44
  19. package/dist/es/ai-model/models/auto-glm/actions.mjs.map +1 -0
  20. package/dist/es/ai-model/models/auto-glm/adapter.mjs +45 -0
  21. package/dist/es/ai-model/models/auto-glm/adapter.mjs.map +1 -0
  22. package/dist/es/ai-model/models/auto-glm/locate.mjs +112 -0
  23. package/dist/es/ai-model/models/auto-glm/locate.mjs.map +1 -0
  24. package/dist/es/ai-model/models/auto-glm/parser.mjs.map +1 -0
  25. package/dist/es/ai-model/{auto-glm → models/auto-glm}/planning.mjs +6 -7
  26. package/dist/es/ai-model/models/auto-glm/planning.mjs.map +1 -0
  27. package/dist/es/ai-model/{auto-glm → models/auto-glm}/prompt.mjs +3 -11
  28. package/dist/es/ai-model/models/auto-glm/prompt.mjs.map +1 -0
  29. package/dist/es/ai-model/models/default.mjs +12 -0
  30. package/dist/es/ai-model/models/default.mjs.map +1 -0
  31. package/dist/es/ai-model/models/doubao.mjs +138 -0
  32. package/dist/es/ai-model/models/doubao.mjs.map +1 -0
  33. package/dist/es/ai-model/models/gemini.mjs +34 -0
  34. package/dist/es/ai-model/models/gemini.mjs.map +1 -0
  35. package/dist/es/ai-model/models/glm.mjs +37 -0
  36. package/dist/es/ai-model/models/glm.mjs.map +1 -0
  37. package/dist/es/ai-model/models/gpt.mjs +31 -0
  38. package/dist/es/ai-model/models/gpt.mjs.map +1 -0
  39. package/dist/es/ai-model/models/index.mjs +2 -0
  40. package/dist/es/ai-model/models/qwen.mjs +113 -0
  41. package/dist/es/ai-model/models/qwen.mjs.map +1 -0
  42. package/dist/es/ai-model/models/registry.mjs +45 -0
  43. package/dist/es/ai-model/models/registry.mjs.map +1 -0
  44. package/dist/es/ai-model/models/resolved.mjs +104 -0
  45. package/dist/es/ai-model/models/resolved.mjs.map +1 -0
  46. package/dist/es/ai-model/models/types.mjs +0 -0
  47. package/dist/es/ai-model/models/ui-tars/adapter.mjs +142 -0
  48. package/dist/es/ai-model/models/ui-tars/adapter.mjs.map +1 -0
  49. package/dist/es/ai-model/{ui-tars-planning.mjs → models/ui-tars/planning.mjs} +44 -62
  50. package/dist/es/ai-model/models/ui-tars/planning.mjs.map +1 -0
  51. package/dist/es/ai-model/prompt/extraction.mjs +3 -3
  52. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -1
  53. package/dist/es/ai-model/prompt/llm-locator.mjs +11 -11
  54. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
  55. package/dist/es/ai-model/prompt/llm-planning.mjs +25 -60
  56. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
  57. package/dist/es/ai-model/prompt/llm-section-locator.mjs +15 -10
  58. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
  59. package/dist/es/ai-model/prompt/locate-grounding-rules.mjs +9 -0
  60. package/dist/es/ai-model/prompt/locate-grounding-rules.mjs.map +1 -0
  61. package/dist/es/ai-model/prompt/locate-param-example.mjs +15 -0
  62. package/dist/es/ai-model/prompt/locate-param-example.mjs.map +1 -0
  63. package/dist/es/ai-model/prompt/playwright-generator.mjs +5 -5
  64. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
  65. package/dist/es/ai-model/prompt/yaml-generator.mjs +5 -5
  66. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
  67. package/dist/es/ai-model/prompts/locate-result-coordinates.mjs +107 -0
  68. package/dist/es/ai-model/prompts/locate-result-coordinates.mjs.map +1 -0
  69. package/dist/es/ai-model/service-caller/index.mjs +59 -190
  70. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  71. package/dist/es/ai-model/service-caller/json.mjs +60 -0
  72. package/dist/es/ai-model/service-caller/json.mjs.map +1 -0
  73. package/dist/es/ai-model/shared/model-locate-result/bbox.mjs +68 -0
  74. package/dist/es/ai-model/shared/model-locate-result/bbox.mjs.map +1 -0
  75. package/dist/es/ai-model/shared/model-locate-result/factory.mjs +96 -0
  76. package/dist/es/ai-model/shared/model-locate-result/factory.mjs.map +1 -0
  77. package/dist/es/ai-model/shared/model-locate-result/index.mjs +3 -0
  78. package/dist/es/ai-model/shared/model-locate-result/parse.mjs +41 -0
  79. package/dist/es/ai-model/shared/model-locate-result/parse.mjs.map +1 -0
  80. package/dist/es/ai-model/shared/model-locate-result/pixel-bbox-mapper.mjs +64 -0
  81. package/dist/es/ai-model/shared/model-locate-result/pixel-bbox-mapper.mjs.map +1 -0
  82. package/dist/es/ai-model/shared/model-locate-result/types.mjs +0 -0
  83. package/dist/es/ai-model/types.mjs +0 -0
  84. package/dist/es/ai-model/workflows/image-preprocess.mjs +27 -0
  85. package/dist/es/ai-model/workflows/image-preprocess.mjs.map +1 -0
  86. package/dist/es/ai-model/workflows/inspect/index.mjs +2 -0
  87. package/dist/es/ai-model/workflows/inspect/locate-result-rect.mjs +23 -0
  88. package/dist/es/ai-model/workflows/inspect/locate-result-rect.mjs.map +1 -0
  89. package/dist/es/ai-model/workflows/inspect/search-area-mapping.mjs +18 -0
  90. package/dist/es/ai-model/workflows/inspect/search-area-mapping.mjs.map +1 -0
  91. package/dist/es/ai-model/workflows/inspect/types.mjs +0 -0
  92. package/dist/es/ai-model/workflows/planning/index.mjs +5 -0
  93. package/dist/es/ai-model/workflows/planning/index.mjs.map +1 -0
  94. package/dist/es/ai-model/workflows/planning/types.mjs +0 -0
  95. package/dist/es/common.mjs +2 -174
  96. package/dist/es/common.mjs.map +1 -1
  97. package/dist/es/device/index.mjs.map +1 -1
  98. package/dist/es/service/index.mjs +96 -69
  99. package/dist/es/service/index.mjs.map +1 -1
  100. package/dist/es/types.mjs.map +1 -1
  101. package/dist/es/utils.mjs +2 -2
  102. package/dist/es/yaml/player.mjs +4 -3
  103. package/dist/es/yaml/player.mjs.map +1 -1
  104. package/dist/lib/agent/agent.js +43 -53
  105. package/dist/lib/agent/agent.js.map +1 -1
  106. package/dist/lib/agent/task-builder.js +38 -18
  107. package/dist/lib/agent/task-builder.js.map +1 -1
  108. package/dist/lib/agent/tasks.js +23 -21
  109. package/dist/lib/agent/tasks.js.map +1 -1
  110. package/dist/lib/agent/utils.js +17 -17
  111. package/dist/lib/agent/utils.js.map +1 -1
  112. package/dist/lib/ai-model/connectivity.js +7 -3
  113. package/dist/lib/ai-model/connectivity.js.map +1 -1
  114. package/dist/lib/ai-model/errors.js +46 -0
  115. package/dist/lib/ai-model/errors.js.map +1 -0
  116. package/dist/lib/ai-model/index.js +7 -14
  117. package/dist/lib/ai-model/inspect.js +141 -144
  118. package/dist/lib/ai-model/inspect.js.map +1 -1
  119. package/dist/lib/ai-model/llm-planning.js +44 -26
  120. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  121. package/dist/lib/ai-model/{auto-glm → models/auto-glm}/actions.js +22 -44
  122. package/dist/lib/ai-model/models/auto-glm/actions.js.map +1 -0
  123. package/dist/lib/ai-model/models/auto-glm/adapter.js +79 -0
  124. package/dist/lib/ai-model/models/auto-glm/adapter.js.map +1 -0
  125. package/dist/lib/ai-model/models/auto-glm/locate.js +146 -0
  126. package/dist/lib/ai-model/models/auto-glm/locate.js.map +1 -0
  127. package/dist/lib/ai-model/models/auto-glm/parser.js.map +1 -0
  128. package/dist/lib/ai-model/{auto-glm → models/auto-glm}/planning.js +8 -9
  129. package/dist/lib/ai-model/models/auto-glm/planning.js.map +1 -0
  130. package/dist/lib/ai-model/{auto-glm → models/auto-glm}/prompt.js +14 -16
  131. package/dist/lib/ai-model/models/auto-glm/prompt.js.map +1 -0
  132. package/dist/lib/ai-model/{auto-glm/util.js → models/default.js} +13 -13
  133. package/dist/lib/ai-model/models/default.js.map +1 -0
  134. package/dist/lib/ai-model/models/doubao.js +184 -0
  135. package/dist/lib/ai-model/models/doubao.js.map +1 -0
  136. package/dist/lib/ai-model/models/gemini.js +68 -0
  137. package/dist/lib/ai-model/models/gemini.js.map +1 -0
  138. package/dist/lib/ai-model/models/glm.js +71 -0
  139. package/dist/lib/ai-model/models/glm.js.map +1 -0
  140. package/dist/lib/ai-model/models/gpt.js +65 -0
  141. package/dist/lib/ai-model/models/gpt.js.map +1 -0
  142. package/dist/lib/ai-model/{service-caller/image-detail.js → models/index.js} +8 -7
  143. package/dist/lib/ai-model/models/index.js.map +1 -0
  144. package/dist/lib/ai-model/models/qwen.js +147 -0
  145. package/dist/lib/ai-model/models/qwen.js.map +1 -0
  146. package/dist/lib/ai-model/models/registry.js +85 -0
  147. package/dist/lib/ai-model/models/registry.js.map +1 -0
  148. package/dist/lib/ai-model/models/resolved.js +138 -0
  149. package/dist/lib/ai-model/models/resolved.js.map +1 -0
  150. package/dist/lib/ai-model/models/types.js +20 -0
  151. package/dist/lib/ai-model/models/types.js.map +1 -0
  152. package/dist/lib/ai-model/models/ui-tars/adapter.js +176 -0
  153. package/dist/lib/ai-model/models/ui-tars/adapter.js.map +1 -0
  154. package/dist/lib/ai-model/{ui-tars-planning.js → models/ui-tars/planning.js} +44 -62
  155. package/dist/lib/ai-model/models/ui-tars/planning.js.map +1 -0
  156. package/dist/lib/ai-model/prompt/extraction.js +3 -3
  157. package/dist/lib/ai-model/prompt/extraction.js.map +1 -1
  158. package/dist/lib/ai-model/prompt/llm-locator.js +11 -11
  159. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
  160. package/dist/lib/ai-model/prompt/llm-planning.js +25 -60
  161. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
  162. package/dist/lib/ai-model/prompt/llm-section-locator.js +15 -10
  163. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
  164. package/dist/lib/ai-model/prompt/locate-grounding-rules.js +43 -0
  165. package/dist/lib/ai-model/prompt/locate-grounding-rules.js.map +1 -0
  166. package/dist/lib/ai-model/prompt/locate-param-example.js +52 -0
  167. package/dist/lib/ai-model/prompt/locate-param-example.js.map +1 -0
  168. package/dist/lib/ai-model/prompt/playwright-generator.js +5 -5
  169. package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
  170. package/dist/lib/ai-model/prompt/yaml-generator.js +5 -5
  171. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
  172. package/dist/lib/ai-model/prompts/locate-result-coordinates.js +150 -0
  173. package/dist/lib/ai-model/prompts/locate-result-coordinates.js.map +1 -0
  174. package/dist/lib/ai-model/service-caller/index.js +68 -199
  175. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  176. package/dist/lib/ai-model/service-caller/json.js +100 -0
  177. package/dist/lib/ai-model/service-caller/json.js.map +1 -0
  178. package/dist/lib/ai-model/shared/model-locate-result/bbox.js +117 -0
  179. package/dist/lib/ai-model/shared/model-locate-result/bbox.js.map +1 -0
  180. package/dist/lib/ai-model/shared/model-locate-result/factory.js +130 -0
  181. package/dist/lib/ai-model/shared/model-locate-result/factory.js.map +1 -0
  182. package/dist/lib/ai-model/{prompt/common.js → shared/model-locate-result/index.js} +9 -9
  183. package/dist/lib/ai-model/shared/model-locate-result/index.js.map +1 -0
  184. package/dist/lib/ai-model/shared/model-locate-result/parse.js +78 -0
  185. package/dist/lib/ai-model/shared/model-locate-result/parse.js.map +1 -0
  186. package/dist/lib/ai-model/shared/model-locate-result/pixel-bbox-mapper.js +98 -0
  187. package/dist/lib/ai-model/shared/model-locate-result/pixel-bbox-mapper.js.map +1 -0
  188. package/dist/lib/ai-model/shared/model-locate-result/types.js +20 -0
  189. package/dist/lib/ai-model/shared/model-locate-result/types.js.map +1 -0
  190. package/dist/lib/ai-model/types.js +20 -0
  191. package/dist/lib/ai-model/types.js.map +1 -0
  192. package/dist/lib/ai-model/workflows/image-preprocess.js +61 -0
  193. package/dist/lib/ai-model/workflows/image-preprocess.js.map +1 -0
  194. package/dist/lib/ai-model/workflows/inspect/index.js +50 -0
  195. package/dist/lib/ai-model/workflows/inspect/index.js.map +1 -0
  196. package/dist/lib/ai-model/workflows/inspect/locate-result-rect.js +60 -0
  197. package/dist/lib/ai-model/workflows/inspect/locate-result-rect.js.map +1 -0
  198. package/dist/lib/ai-model/workflows/inspect/search-area-mapping.js +52 -0
  199. package/dist/lib/ai-model/workflows/inspect/search-area-mapping.js.map +1 -0
  200. package/dist/lib/ai-model/workflows/inspect/types.js +20 -0
  201. package/dist/lib/ai-model/workflows/inspect/types.js.map +1 -0
  202. package/dist/lib/ai-model/{model-family.js → workflows/planning/index.js} +6 -7
  203. package/dist/lib/ai-model/workflows/planning/index.js.map +1 -0
  204. package/dist/lib/ai-model/workflows/planning/types.js +20 -0
  205. package/dist/lib/ai-model/workflows/planning/types.js.map +1 -0
  206. package/dist/lib/common.js +4 -206
  207. package/dist/lib/common.js.map +1 -1
  208. package/dist/lib/device/index.js.map +1 -1
  209. package/dist/lib/service/index.js +96 -69
  210. package/dist/lib/service/index.js.map +1 -1
  211. package/dist/lib/types.js.map +1 -1
  212. package/dist/lib/utils.js +2 -2
  213. package/dist/lib/yaml/player.js +4 -3
  214. package/dist/lib/yaml/player.js.map +1 -1
  215. package/dist/types/agent/agent.d.ts +14 -6
  216. package/dist/types/agent/task-builder.d.ts +2 -2
  217. package/dist/types/agent/tasks.d.ts +6 -6
  218. package/dist/types/agent/utils.d.ts +8 -5
  219. package/dist/types/ai-model/errors.d.ts +2 -0
  220. package/dist/types/ai-model/index.d.ts +2 -4
  221. package/dist/types/ai-model/inspect.d.ts +13 -33
  222. package/dist/types/ai-model/llm-planning.d.ts +6 -17
  223. package/dist/types/ai-model/{auto-glm → models/auto-glm}/actions.d.ts +2 -2
  224. package/dist/types/ai-model/models/auto-glm/adapter.d.ts +5 -0
  225. package/dist/types/ai-model/models/auto-glm/locate.d.ts +3 -0
  226. package/dist/types/ai-model/models/auto-glm/planning.d.ts +3 -0
  227. package/dist/types/ai-model/models/auto-glm/prompt.d.ts +4 -0
  228. package/dist/types/ai-model/models/default.d.ts +2 -0
  229. package/dist/types/ai-model/models/doubao.d.ts +10 -0
  230. package/dist/types/ai-model/models/gemini.d.ts +18 -0
  231. package/dist/types/ai-model/models/glm.d.ts +18 -0
  232. package/dist/types/ai-model/models/gpt.d.ts +18 -0
  233. package/dist/types/ai-model/models/index.d.ts +2 -0
  234. package/dist/types/ai-model/models/qwen.d.ts +30 -0
  235. package/dist/types/ai-model/models/registry.d.ts +81 -0
  236. package/dist/types/ai-model/models/resolved.d.ts +9 -0
  237. package/dist/types/ai-model/models/types.d.ts +102 -0
  238. package/dist/types/ai-model/models/ui-tars/adapter.d.ts +6 -0
  239. package/dist/types/ai-model/{ui-tars-planning.d.ts → models/ui-tars/planning.d.ts} +7 -11
  240. package/dist/types/ai-model/prompt/llm-locator.d.ts +2 -2
  241. package/dist/types/ai-model/prompt/llm-planning.d.ts +5 -5
  242. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +2 -2
  243. package/dist/types/ai-model/prompt/locate-grounding-rules.d.ts +1 -0
  244. package/dist/types/ai-model/prompt/locate-param-example.d.ts +3 -0
  245. package/dist/types/ai-model/prompt/playwright-generator.d.ts +3 -3
  246. package/dist/types/ai-model/prompt/yaml-generator.d.ts +3 -3
  247. package/dist/types/ai-model/prompts/locate-result-coordinates.d.ts +6 -0
  248. package/dist/types/ai-model/service-caller/index.d.ts +19 -27
  249. package/dist/types/ai-model/service-caller/json.d.ts +9 -0
  250. package/dist/types/ai-model/shared/model-locate-result/bbox.d.ts +7 -0
  251. package/dist/types/ai-model/shared/model-locate-result/factory.d.ts +2 -0
  252. package/dist/types/ai-model/shared/model-locate-result/index.d.ts +3 -0
  253. package/dist/types/ai-model/shared/model-locate-result/parse.d.ts +5 -0
  254. package/dist/types/ai-model/shared/model-locate-result/pixel-bbox-mapper.d.ts +7 -0
  255. package/dist/types/ai-model/shared/model-locate-result/types.d.ts +157 -0
  256. package/dist/types/ai-model/types.d.ts +2 -0
  257. package/dist/types/ai-model/workflows/image-preprocess.d.ts +30 -0
  258. package/dist/types/ai-model/workflows/inspect/index.d.ts +1 -0
  259. package/dist/types/ai-model/workflows/inspect/locate-result-rect.d.ts +4 -0
  260. package/dist/types/ai-model/workflows/inspect/search-area-mapping.d.ts +3 -0
  261. package/dist/types/ai-model/workflows/inspect/types.d.ts +37 -0
  262. package/dist/types/ai-model/workflows/planning/index.d.ts +2 -0
  263. package/dist/types/ai-model/workflows/planning/types.d.ts +15 -0
  264. package/dist/types/common.d.ts +0 -30
  265. package/dist/types/device/index.d.ts +22 -22
  266. package/dist/types/service/index.d.ts +5 -4
  267. package/dist/types/types.d.ts +21 -9
  268. package/dist/types/yaml.d.ts +8 -2
  269. package/package.json +2 -2
  270. package/dist/es/ai-model/auto-glm/actions.mjs.map +0 -1
  271. package/dist/es/ai-model/auto-glm/index.mjs +0 -6
  272. package/dist/es/ai-model/auto-glm/parser.mjs.map +0 -1
  273. package/dist/es/ai-model/auto-glm/planning.mjs.map +0 -1
  274. package/dist/es/ai-model/auto-glm/prompt.mjs.map +0 -1
  275. package/dist/es/ai-model/auto-glm/util.mjs +0 -9
  276. package/dist/es/ai-model/auto-glm/util.mjs.map +0 -1
  277. package/dist/es/ai-model/model-family.mjs +0 -6
  278. package/dist/es/ai-model/model-family.mjs.map +0 -1
  279. package/dist/es/ai-model/prompt/common.mjs +0 -8
  280. package/dist/es/ai-model/prompt/common.mjs.map +0 -1
  281. package/dist/es/ai-model/service-caller/image-detail.mjs +0 -6
  282. package/dist/es/ai-model/service-caller/image-detail.mjs.map +0 -1
  283. package/dist/es/ai-model/ui-tars-planning.mjs.map +0 -1
  284. package/dist/lib/ai-model/auto-glm/actions.js.map +0 -1
  285. package/dist/lib/ai-model/auto-glm/index.js +0 -66
  286. package/dist/lib/ai-model/auto-glm/index.js.map +0 -1
  287. package/dist/lib/ai-model/auto-glm/parser.js.map +0 -1
  288. package/dist/lib/ai-model/auto-glm/planning.js.map +0 -1
  289. package/dist/lib/ai-model/auto-glm/prompt.js.map +0 -1
  290. package/dist/lib/ai-model/auto-glm/util.js.map +0 -1
  291. package/dist/lib/ai-model/model-family.js.map +0 -1
  292. package/dist/lib/ai-model/prompt/common.js.map +0 -1
  293. package/dist/lib/ai-model/service-caller/image-detail.js.map +0 -1
  294. package/dist/lib/ai-model/ui-tars-planning.js.map +0 -1
  295. package/dist/types/ai-model/auto-glm/index.d.ts +0 -6
  296. package/dist/types/ai-model/auto-glm/planning.d.ts +0 -12
  297. package/dist/types/ai-model/auto-glm/prompt.d.ts +0 -27
  298. package/dist/types/ai-model/auto-glm/util.d.ts +0 -13
  299. package/dist/types/ai-model/model-family.d.ts +0 -7
  300. package/dist/types/ai-model/prompt/common.d.ts +0 -2
  301. package/dist/types/ai-model/service-caller/image-detail.d.ts +0 -2
  302. /package/dist/es/ai-model/{auto-glm → models/auto-glm}/parser.mjs +0 -0
  303. /package/dist/lib/ai-model/{auto-glm → models/auto-glm}/parser.js +0 -0
  304. /package/dist/types/ai-model/{auto-glm → models/auto-glm}/parser.d.ts +0 -0
@@ -0,0 +1,176 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.d = (exports1, definition)=>{
5
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
+ enumerable: true,
7
+ get: definition[key]
8
+ });
9
+ };
10
+ })();
11
+ (()=>{
12
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
+ })();
14
+ (()=>{
15
+ __webpack_require__.r = (exports1)=>{
16
+ if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
+ value: 'Module'
18
+ });
19
+ Object.defineProperty(exports1, '__esModule', {
20
+ value: true
21
+ });
22
+ };
23
+ })();
24
+ var __webpack_exports__ = {};
25
+ __webpack_require__.r(__webpack_exports__);
26
+ __webpack_require__.d(__webpack_exports__, {
27
+ uiTarsAdapters: ()=>uiTarsAdapters
28
+ });
29
+ const env_namespaceObject = require("@midscene/shared/env");
30
+ const utils_namespaceObject = require("@midscene/shared/utils");
31
+ const external_jsonrepair_namespaceObject = require("jsonrepair");
32
+ const json_js_namespaceObject = require("../../service-caller/json.js");
33
+ const index_js_namespaceObject = require("../../shared/model-locate-result/index.js");
34
+ const external_planning_js_namespaceObject = require("./planning.js");
35
+ const defaultVlmUiTarsReplanningCycleLimit = 40;
36
+ function normalizeJsonObject(obj, context = {}) {
37
+ if (null == obj) return obj;
38
+ if (Array.isArray(obj)) return obj.map((item)=>normalizeJsonObject(item, context));
39
+ if ('object' == typeof obj) {
40
+ const normalized = {};
41
+ for (const [key, value] of Object.entries(obj)){
42
+ const trimmedKey = key.trim();
43
+ const preserveStringValue = context.preserveStringValueKeys?.includes(trimmedKey) ?? false;
44
+ const normalizedValue = 'string' == typeof value ? preserveStringValue ? value : value.trim() : normalizeJsonObject(value, context);
45
+ normalized[trimmedKey] = normalizedValue;
46
+ }
47
+ return normalized;
48
+ }
49
+ return 'string' == typeof obj ? obj.trim() : obj;
50
+ }
51
+ function shouldRepairUiTarsLocateJson(source) {
52
+ return 'locate' === source || 'section-locator' === source || 'planning-action-param' === source;
53
+ }
54
+ function preprocessUiTarsLocateJson(input) {
55
+ if (input.includes('bbox')) while(/\d+\s+\d+/.test(input))input = input.replace(/(\d+)\s+(\d+)/g, '$1,$2');
56
+ return input;
57
+ }
58
+ const uiTarsJsonParser = (raw, context = {
59
+ source: 'generic-object'
60
+ })=>{
61
+ const { source } = context;
62
+ try {
63
+ return (0, json_js_namespaceObject.safeParseJson)(raw, context);
64
+ } catch (firstError) {
65
+ if (!shouldRepairUiTarsLocateJson(source)) throw firstError;
66
+ const jsonString = preprocessUiTarsLocateJson((0, json_js_namespaceObject.extractJSONFromCodeBlock)(raw));
67
+ try {
68
+ return normalizeJsonObject(JSON.parse((0, external_jsonrepair_namespaceObject.jsonrepair)(jsonString)), context);
69
+ } catch (error) {
70
+ throw Error(`failed to parse LLM response into JSON. Error - ${String(error ?? firstError ?? 'unknown error')}. Response - \n ${raw}`);
71
+ }
72
+ }
73
+ };
74
+ function parseUiTarsRawLocateValue(input) {
75
+ const bbox = (0, index_js_namespaceObject.unwrapCoordinateListLikeInput)(input);
76
+ if ('string' == typeof bbox) {
77
+ (0, utils_namespaceObject.assert)(/^(\d+)\s(\d+)\s(\d+)\s(\d+)$/.test(bbox.trim()), `invalid bbox data string for ui-tars mode: ${bbox}`);
78
+ const splitted = bbox.split(' ');
79
+ if (4 === splitted.length) return {
80
+ type: 'bbox',
81
+ coordinates: [
82
+ Number(splitted[0]),
83
+ Number(splitted[1]),
84
+ Number(splitted[2]),
85
+ Number(splitted[3])
86
+ ]
87
+ };
88
+ throw new Error(`invalid bbox data string for ui-tars mode: ${bbox}`);
89
+ }
90
+ let bboxList = [];
91
+ if (Array.isArray(bbox) && 'string' == typeof bbox[0]) bbox.forEach((item)=>{
92
+ if ('string' == typeof item && item.includes(',')) {
93
+ const [x, y] = item.split(',');
94
+ bboxList.push(Number(x.trim()), Number(y.trim()));
95
+ } else if ('string' == typeof item && item.includes(' ')) {
96
+ const [x, y] = item.split(' ');
97
+ bboxList.push(Number(x.trim()), Number(y.trim()));
98
+ } else bboxList.push(Number(item));
99
+ });
100
+ else bboxList = bbox;
101
+ if (4 === bboxList.length || 5 === bboxList.length) return {
102
+ type: 'bbox',
103
+ coordinates: [
104
+ bboxList[0],
105
+ bboxList[1],
106
+ bboxList[2],
107
+ bboxList[3]
108
+ ]
109
+ };
110
+ if (6 === bboxList.length || 2 === bboxList.length || 3 === bboxList.length || 7 === bboxList.length) return {
111
+ type: 'point',
112
+ coordinates: [
113
+ bboxList[0],
114
+ bboxList[1]
115
+ ]
116
+ };
117
+ if (8 === bbox.length) return {
118
+ type: 'bbox',
119
+ coordinates: [
120
+ bboxList[0],
121
+ bboxList[1],
122
+ bboxList[4],
123
+ bboxList[5]
124
+ ]
125
+ };
126
+ const msg = `invalid bbox data for ui-tars mode: ${JSON.stringify(bbox)} `;
127
+ throw new Error(msg);
128
+ }
129
+ function createUiTarsAdapter(uiTarsModelVersion) {
130
+ return {
131
+ jsonParser: uiTarsJsonParser,
132
+ chatCompletion: {
133
+ unsupportedUserConfig: [
134
+ 'reasoningEnabled',
135
+ 'reasoningEffort',
136
+ 'reasoningBudget'
137
+ ],
138
+ buildChatCompletionParams: ({ midsceneDefaults, userConfig })=>({
139
+ config: {
140
+ temperature: userConfig.temperature ?? midsceneDefaults.temperature
141
+ }
142
+ })
143
+ },
144
+ planning: {
145
+ kind: 'custom',
146
+ cacheEnabled: false,
147
+ defaultReplanningCycleLimit: defaultVlmUiTarsReplanningCycleLimit,
148
+ planFn: (userInstruction, options)=>(0, external_planning_js_namespaceObject.uiTarsPlanning)(userInstruction, options, uiTarsModelVersion)
149
+ },
150
+ locate: {
151
+ resultAdapter: {
152
+ coordinates: {
153
+ shape: 'bbox',
154
+ order: 'xy',
155
+ normalizedBy: 1000
156
+ },
157
+ parseRawLocateValue: parseUiTarsRawLocateValue
158
+ }
159
+ }
160
+ };
161
+ }
162
+ const uiTarsDoubao15Adapter = createUiTarsAdapter(env_namespaceObject.UITarsModelVersion.DOUBAO_1_5_20B);
163
+ const uiTarsAdapters = {
164
+ 'vlm-ui-tars': createUiTarsAdapter(env_namespaceObject.UITarsModelVersion.V1_0),
165
+ 'vlm-ui-tars-doubao': uiTarsDoubao15Adapter,
166
+ 'vlm-ui-tars-doubao-1.5': uiTarsDoubao15Adapter
167
+ };
168
+ exports.uiTarsAdapters = __webpack_exports__.uiTarsAdapters;
169
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
170
+ "uiTarsAdapters"
171
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
172
+ Object.defineProperty(exports, '__esModule', {
173
+ value: true
174
+ });
175
+
176
+ //# sourceMappingURL=adapter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai-model/models/ui-tars/adapter.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../../src/ai-model/models/ui-tars/adapter.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import { type TModelFamily, UITarsModelVersion } from '@midscene/shared/env';\nimport { assert } from '@midscene/shared/utils';\nimport { jsonrepair } from 'jsonrepair';\nimport {\n extractJSONFromCodeBlock,\n safeParseJson,\n} from '../../service-caller/json';\nimport {\n type LocateResultValue,\n unwrapCoordinateListLikeInput,\n} from '../../shared/model-locate-result';\nimport type {\n JsonParserContext,\n JsonParserSource,\n ModelAdapterDefinition,\n} from '../types';\nimport { uiTarsPlanning } from './planning';\n\nconst defaultVlmUiTarsReplanningCycleLimit = 40;\n\nfunction normalizeJsonObject(\n obj: any,\n context: Pick<JsonParserContext, 'preserveStringValueKeys'> = {},\n): any {\n if (obj === null || obj === undefined) {\n return obj;\n }\n\n if (Array.isArray(obj)) {\n return obj.map((item) => normalizeJsonObject(item, context));\n }\n\n if (typeof obj === 'object') {\n const normalized: any = {};\n for (const [key, value] of Object.entries(obj)) {\n const trimmedKey = key.trim();\n const preserveStringValue =\n context.preserveStringValueKeys?.includes(trimmedKey) ?? false;\n const normalizedValue =\n typeof value === 'string'\n ? preserveStringValue\n ? value\n : value.trim()\n : normalizeJsonObject(value, context);\n normalized[trimmedKey] = normalizedValue;\n }\n return normalized;\n }\n\n return typeof obj === 'string' ? obj.trim() : obj;\n}\n\nfunction shouldRepairUiTarsLocateJson(source: JsonParserSource) {\n return (\n source === 'locate' ||\n source === 'section-locator' ||\n source === 'planning-action-param'\n );\n}\n\nfunction preprocessUiTarsLocateJson(input: string) {\n if (input.includes('bbox')) {\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\nconst uiTarsJsonParser: ModelAdapterDefinition['jsonParser'] = (\n raw,\n context = { source: 'generic-object' },\n) => {\n const { source } = context;\n try {\n return safeParseJson(raw, context);\n } catch (firstError) {\n if (!shouldRepairUiTarsLocateJson(source)) {\n throw firstError;\n }\n\n const jsonString = preprocessUiTarsLocateJson(\n extractJSONFromCodeBlock(raw),\n );\n try {\n return normalizeJsonObject(JSON.parse(jsonrepair(jsonString)), context);\n } catch (error) {\n throw Error(\n `failed to parse LLM response into JSON. Error - ${String(\n error ?? firstError ?? 'unknown error',\n )}. Response - \\n ${raw}`,\n );\n }\n }\n};\n\n// UI-TARS has not received active updates for a long time, so this parser is\n// intentionally kept separate from Doubao even though the current logic is the\n// same. This avoids coupling UI-TARS behavior to future Doubao adapter changes.\nfunction parseUiTarsRawLocateValue(input: unknown): LocateResultValue {\n const bbox = unwrapCoordinateListLikeInput(input as any);\n if (typeof bbox === 'string') {\n assert(\n /^(\\d+)\\s(\\d+)\\s(\\d+)\\s(\\d+)$/.test(bbox.trim()),\n `invalid bbox data string for ui-tars mode: ${bbox}`,\n );\n const splitted = bbox.split(' ');\n if (splitted.length === 4) {\n return {\n type: 'bbox',\n coordinates: [\n Number(splitted[0]),\n Number(splitted[1]),\n Number(splitted[2]),\n Number(splitted[3]),\n ],\n };\n }\n throw new Error(`invalid bbox data string for ui-tars mode: ${bbox}`);\n }\n\n let bboxList: number[] = [];\n if (Array.isArray(bbox) && typeof bbox[0] === 'string') {\n bbox.forEach((item) => {\n if (typeof item === 'string' && item.includes(',')) {\n const [x, y] = item.split(',');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else if (typeof item === 'string' && item.includes(' ')) {\n const [x, y] = item.split(' ');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else {\n bboxList.push(Number(item));\n }\n });\n } else {\n bboxList = bbox as number[];\n }\n\n if (bboxList.length === 4 || bboxList.length === 5) {\n return {\n type: 'bbox',\n coordinates: [bboxList[0], bboxList[1], bboxList[2], bboxList[3]],\n };\n }\n\n if (\n bboxList.length === 6 ||\n bboxList.length === 2 ||\n bboxList.length === 3 ||\n bboxList.length === 7\n ) {\n return { type: 'point', coordinates: [bboxList[0], bboxList[1]] };\n }\n\n if (bbox.length === 8) {\n return {\n type: 'bbox',\n coordinates: [bboxList[0], bboxList[1], bboxList[4], bboxList[5]],\n };\n }\n\n const msg = `invalid bbox data for ui-tars mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n}\n\nfunction createUiTarsAdapter(\n uiTarsModelVersion: UITarsModelVersion,\n): ModelAdapterDefinition {\n return {\n jsonParser: uiTarsJsonParser,\n chatCompletion: {\n unsupportedUserConfig: [\n 'reasoningEnabled',\n 'reasoningEffort',\n 'reasoningBudget',\n ],\n buildChatCompletionParams: ({ midsceneDefaults, userConfig }) => ({\n config: {\n temperature: userConfig.temperature ?? midsceneDefaults.temperature,\n },\n }),\n },\n planning: {\n kind: 'custom',\n cacheEnabled: false,\n defaultReplanningCycleLimit: defaultVlmUiTarsReplanningCycleLimit,\n planFn: (userInstruction, options) =>\n uiTarsPlanning(userInstruction, options, uiTarsModelVersion),\n },\n locate: {\n resultAdapter: {\n coordinates: { shape: 'bbox', order: 'xy', normalizedBy: 1000 },\n parseRawLocateValue: parseUiTarsRawLocateValue,\n },\n },\n };\n}\n\nconst uiTarsDoubao15Adapter = createUiTarsAdapter(\n UITarsModelVersion.DOUBAO_1_5_20B,\n);\n\nexport const uiTarsAdapters = {\n 'vlm-ui-tars': createUiTarsAdapter(UITarsModelVersion.V1_0),\n 'vlm-ui-tars-doubao': uiTarsDoubao15Adapter,\n 'vlm-ui-tars-doubao-1.5': uiTarsDoubao15Adapter,\n} satisfies Pick<\n Record<TModelFamily, ModelAdapterDefinition>,\n 'vlm-ui-tars' | 'vlm-ui-tars-doubao' | 'vlm-ui-tars-doubao-1.5'\n>;\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","defaultVlmUiTarsReplanningCycleLimit","normalizeJsonObject","context","Array","item","normalized","value","trimmedKey","preserveStringValue","normalizedValue","shouldRepairUiTarsLocateJson","source","preprocessUiTarsLocateJson","input","uiTarsJsonParser","raw","safeParseJson","firstError","jsonString","extractJSONFromCodeBlock","JSON","jsonrepair","error","Error","String","parseUiTarsRawLocateValue","bbox","unwrapCoordinateListLikeInput","assert","splitted","Number","bboxList","x","y","msg","createUiTarsAdapter","uiTarsModelVersion","midsceneDefaults","userConfig","userInstruction","options","uiTarsPlanning","uiTarsDoubao15Adapter","UITarsModelVersion","uiTarsAdapters"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;ACYA,MAAMI,uCAAuC;AAE7C,SAASC,oBACPJ,GAAQ,EACRK,UAA8D,CAAC,CAAC;IAEhE,IAAIL,QAAAA,KACF,OAAOA;IAGT,IAAIM,MAAM,OAAO,CAACN,MAChB,OAAOA,IAAI,GAAG,CAAC,CAACO,OAASH,oBAAoBG,MAAMF;IAGrD,IAAI,AAAe,YAAf,OAAOL,KAAkB;QAC3B,MAAMQ,aAAkB,CAAC;QACzB,KAAK,MAAM,CAACV,KAAKW,MAAM,IAAIV,OAAO,OAAO,CAACC,KAAM;YAC9C,MAAMU,aAAaZ,IAAI,IAAI;YAC3B,MAAMa,sBACJN,QAAQ,uBAAuB,EAAE,SAASK,eAAe;YAC3D,MAAME,kBACJ,AAAiB,YAAjB,OAAOH,QACHE,sBACEF,QACAA,MAAM,IAAI,KACZL,oBAAoBK,OAAOJ;YACjCG,UAAU,CAACE,WAAW,GAAGE;QAC3B;QACA,OAAOJ;IACT;IAEA,OAAO,AAAe,YAAf,OAAOR,MAAmBA,IAAI,IAAI,KAAKA;AAChD;AAEA,SAASa,6BAA6BC,MAAwB;IAC5D,OACEA,AAAW,aAAXA,UACAA,AAAW,sBAAXA,UACAA,AAAW,4BAAXA;AAEJ;AAEA,SAASC,2BAA2BC,KAAa;IAC/C,IAAIA,MAAM,QAAQ,CAAC,SACjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;IAG5C,OAAOA;AACT;AAEA,MAAMC,mBAAyD,CAC7DC,KACAb,UAAU;IAAE,QAAQ;AAAiB,CAAC;IAEtC,MAAM,EAAES,MAAM,EAAE,GAAGT;IACnB,IAAI;QACF,OAAOc,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcD,KAAKb;IAC5B,EAAE,OAAOe,YAAY;QACnB,IAAI,CAACP,6BAA6BC,SAChC,MAAMM;QAGR,MAAMC,aAAaN,2BACjBO,AAAAA,IAAAA,wBAAAA,wBAAAA,AAAAA,EAAyBJ;QAE3B,IAAI;YACF,OAAOd,oBAAoBmB,KAAK,KAAK,CAACC,AAAAA,IAAAA,oCAAAA,UAAAA,AAAAA,EAAWH,cAAchB;QACjE,EAAE,OAAOoB,OAAO;YACd,MAAMC,MACJ,CAAC,gDAAgD,EAAEC,OACjDF,SAASL,cAAc,iBACvB,gBAAgB,EAAEF,KAAK;QAE7B;IACF;AACF;AAKA,SAASU,0BAA0BZ,KAAc;IAC/C,MAAMa,OAAOC,AAAAA,IAAAA,yBAAAA,6BAAAA,AAAAA,EAA8Bd;IAC3C,IAAI,AAAgB,YAAhB,OAAOa,MAAmB;QAC5BE,IAAAA,sBAAAA,MAAAA,AAAAA,EACE,+BAA+B,IAAI,CAACF,KAAK,IAAI,KAC7C,CAAC,2CAA2C,EAAEA,MAAM;QAEtD,MAAMG,WAAWH,KAAK,KAAK,CAAC;QAC5B,IAAIG,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACL,MAAM;YACN,aAAa;gBACXC,OAAOD,QAAQ,CAAC,EAAE;gBAClBC,OAAOD,QAAQ,CAAC,EAAE;gBAClBC,OAAOD,QAAQ,CAAC,EAAE;gBAClBC,OAAOD,QAAQ,CAAC,EAAE;aACnB;QACH;QAEF,MAAM,IAAIN,MAAM,CAAC,2CAA2C,EAAEG,MAAM;IACtE;IAEA,IAAIK,WAAqB,EAAE;IAC3B,IAAI5B,MAAM,OAAO,CAACuB,SAAS,AAAmB,YAAnB,OAAOA,IAAI,CAAC,EAAE,EACvCA,KAAK,OAAO,CAAC,CAACtB;QACZ,IAAI,AAAgB,YAAhB,OAAOA,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YAClD,MAAM,CAAC4B,GAAGC,EAAE,GAAG7B,KAAK,KAAK,CAAC;YAC1B2B,SAAS,IAAI,CAACD,OAAOE,EAAE,IAAI,KAAKF,OAAOG,EAAE,IAAI;QAC/C,OAAO,IAAI,AAAgB,YAAhB,OAAO7B,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YACzD,MAAM,CAAC4B,GAAGC,EAAE,GAAG7B,KAAK,KAAK,CAAC;YAC1B2B,SAAS,IAAI,CAACD,OAAOE,EAAE,IAAI,KAAKF,OAAOG,EAAE,IAAI;QAC/C,OACEF,SAAS,IAAI,CAACD,OAAO1B;IAEzB;SAEA2B,WAAWL;IAGb,IAAIK,AAAoB,MAApBA,SAAS,MAAM,IAAUA,AAAoB,MAApBA,SAAS,MAAM,EAC1C,OAAO;QACL,MAAM;QACN,aAAa;YAACA,QAAQ,CAAC,EAAE;YAAEA,QAAQ,CAAC,EAAE;YAAEA,QAAQ,CAAC,EAAE;YAAEA,QAAQ,CAAC,EAAE;SAAC;IACnE;IAGF,IACEA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,EAEf,OAAO;QAAE,MAAM;QAAS,aAAa;YAACA,QAAQ,CAAC,EAAE;YAAEA,QAAQ,CAAC,EAAE;SAAC;IAAC;IAGlE,IAAIL,AAAgB,MAAhBA,KAAK,MAAM,EACb,OAAO;QACL,MAAM;QACN,aAAa;YAACK,QAAQ,CAAC,EAAE;YAAEA,QAAQ,CAAC,EAAE;YAAEA,QAAQ,CAAC,EAAE;YAAEA,QAAQ,CAAC,EAAE;SAAC;IACnE;IAGF,MAAMG,MAAM,CAAC,oCAAoC,EAAEd,KAAK,SAAS,CAACM,MAAM,CAAC,CAAC;IAC1E,MAAM,IAAIH,MAAMW;AAClB;AAEA,SAASC,oBACPC,kBAAsC;IAEtC,OAAO;QACL,YAAYtB;QACZ,gBAAgB;YACd,uBAAuB;gBACrB;gBACA;gBACA;aACD;YACD,2BAA2B,CAAC,EAAEuB,gBAAgB,EAAEC,UAAU,EAAE,GAAM;oBAChE,QAAQ;wBACN,aAAaA,WAAW,WAAW,IAAID,iBAAiB,WAAW;oBACrE;gBACF;QACF;QACA,UAAU;YACR,MAAM;YACN,cAAc;YACd,6BAA6BrC;YAC7B,QAAQ,CAACuC,iBAAiBC,UACxBC,AAAAA,IAAAA,qCAAAA,cAAAA,AAAAA,EAAeF,iBAAiBC,SAASJ;QAC7C;QACA,QAAQ;YACN,eAAe;gBACb,aAAa;oBAAE,OAAO;oBAAQ,OAAO;oBAAM,cAAc;gBAAK;gBAC9D,qBAAqBX;YACvB;QACF;IACF;AACF;AAEA,MAAMiB,wBAAwBP,oBAC5BQ,oBAAAA,kBAAAA,CAAAA,cAAiC;AAG5B,MAAMC,iBAAiB;IAC5B,eAAeT,oBAAoBQ,oBAAAA,kBAAAA,CAAAA,IAAuB;IAC1D,sBAAsBD;IACtB,0BAA0BA;AAC5B"}
@@ -30,22 +30,33 @@ const logger_namespaceObject = require("@midscene/shared/logger");
30
30
  const us_keyboard_layout_namespaceObject = require("@midscene/shared/us-keyboard-layout");
31
31
  const utils_namespaceObject = require("@midscene/shared/utils");
32
32
  const action_parser_namespaceObject = require("@ui-tars/action-parser");
33
- const ui_tars_planning_js_namespaceObject = require("./prompt/ui-tars-planning.js");
34
- const index_js_namespaceObject = require("./service-caller/index.js");
33
+ const ui_tars_planning_js_namespaceObject = require("../../prompt/ui-tars-planning.js");
34
+ const index_js_namespaceObject = require("../../service-caller/index.js");
35
+ const bbox_js_namespaceObject = require("../../shared/model-locate-result/bbox.js");
36
+ const pixel_bbox_mapper_js_namespaceObject = require("../../shared/model-locate-result/pixel-bbox-mapper.js");
35
37
  const debug = (0, logger_namespaceObject.getDebug)('ui-tars-planning');
36
38
  const warnLog = (0, logger_namespaceObject.getDebug)('ui-tars-planning', {
37
39
  console: true
38
40
  });
39
- const bboxSize = 10;
40
- const pointToBbox = (point, width, height)=>[
41
- Math.round(Math.max(point.x - bboxSize / 2, 0)),
42
- Math.round(Math.max(point.y - bboxSize / 2, 0)),
43
- Math.round(Math.min(point.x + bboxSize / 2, width)),
44
- Math.round(Math.min(point.y + bboxSize / 2, height))
45
- ];
46
- async function uiTarsPlanning(userInstruction, options) {
47
- const { conversationHistory, context, modelConfig, actionContext } = options;
48
- const { uiTarsModelVersion } = modelConfig;
41
+ function pointToLocateParam(point, thought, size) {
42
+ const ctx = {
43
+ preparedSize: size
44
+ };
45
+ const pixelBbox = (0, pixel_bbox_mapper_js_namespaceObject.mapLocateResultToPixelBboxByCoordinates)({
46
+ type: 'point',
47
+ coordinates: point
48
+ }, ctx, {
49
+ shape: 'point',
50
+ order: 'xy',
51
+ normalizedBy: 1
52
+ });
53
+ return {
54
+ prompt: thought || '',
55
+ locatedPixelBbox: (0, bbox_js_namespaceObject.finalizePixelBbox)(pixelBbox, point, ctx)
56
+ };
57
+ }
58
+ async function uiTarsPlanning(userInstruction, options, uiTarsModelVersion) {
59
+ const { conversationHistory, context, modelRuntime, actionContext } = options;
49
60
  let instruction = userInstruction;
50
61
  if (actionContext) instruction = `<high_priority_knowledge>${actionContext}</high_priority_knowledge>\n<user_instruction>${userInstruction}</user_instruction>`;
51
62
  const systemPrompt = (0, ui_tars_planning_js_namespaceObject.getUiTarsPlanningPrompt)() + instruction;
@@ -67,7 +78,7 @@ async function uiTarsPlanning(userInstruction, options) {
67
78
  content: systemPrompt
68
79
  },
69
80
  ...conversationHistory.snapshot()
70
- ], modelConfig, {
81
+ ], modelRuntime, {
71
82
  abortSignal: options.abortSignal
72
83
  });
73
84
  let convertedText;
@@ -101,76 +112,46 @@ async function uiTarsPlanning(userInstruction, options) {
101
112
  const actionType = (action.action_type || '').toLowerCase();
102
113
  if ('click' === actionType) {
103
114
  (0, utils_namespaceObject.assert)(action.action_inputs.start_box, 'start_box is required');
104
- const point = getPoint(action.action_inputs.start_box, shotSize);
105
- const locate = {
106
- prompt: action.thought || '',
107
- bbox: pointToBbox({
108
- x: point[0],
109
- y: point[1]
110
- }, shotSize.width, shotSize.height)
111
- };
115
+ const point = getPoint(action.action_inputs.start_box);
116
+ const locate = pointToLocateParam(point, action.thought, shotSize);
112
117
  transformActions.push({
113
118
  type: 'Tap',
114
119
  param: {
115
- locate: locate
120
+ locate
116
121
  }
117
122
  });
118
123
  } else if ('left_double' === actionType) {
119
124
  (0, utils_namespaceObject.assert)(action.action_inputs.start_box, 'start_box is required');
120
- const point = getPoint(action.action_inputs.start_box, shotSize);
121
- const locate = {
122
- prompt: action.thought || '',
123
- bbox: pointToBbox({
124
- x: point[0],
125
- y: point[1]
126
- }, shotSize.width, shotSize.height)
127
- };
125
+ const point = getPoint(action.action_inputs.start_box);
126
+ const locate = pointToLocateParam(point, action.thought, shotSize);
128
127
  transformActions.push({
129
128
  type: 'DoubleClick',
130
129
  param: {
131
- locate: locate
130
+ locate
132
131
  },
133
132
  thought: action.thought || ''
134
133
  });
135
134
  } else if ('right_single' === actionType) {
136
135
  (0, utils_namespaceObject.assert)(action.action_inputs.start_box, 'start_box is required');
137
- const point = getPoint(action.action_inputs.start_box, shotSize);
138
- const locate = {
139
- prompt: action.thought || '',
140
- bbox: pointToBbox({
141
- x: point[0],
142
- y: point[1]
143
- }, shotSize.width, shotSize.height)
144
- };
136
+ const point = getPoint(action.action_inputs.start_box);
137
+ const locate = pointToLocateParam(point, action.thought, shotSize);
145
138
  transformActions.push({
146
139
  type: 'RightClick',
147
140
  param: {
148
- locate: locate
141
+ locate
149
142
  },
150
143
  thought: action.thought || ''
151
144
  });
152
145
  } else if ('drag' === actionType) {
153
146
  (0, utils_namespaceObject.assert)(action.action_inputs.start_box, 'start_box is required');
154
147
  (0, utils_namespaceObject.assert)(action.action_inputs.end_box, 'end_box is required');
155
- const startPoint = getPoint(action.action_inputs.start_box, shotSize);
156
- const endPoint = getPoint(action.action_inputs.end_box, shotSize);
148
+ const startPoint = getPoint(action.action_inputs.start_box);
149
+ const endPoint = getPoint(action.action_inputs.end_box);
157
150
  transformActions.push({
158
151
  type: 'DragAndDrop',
159
152
  param: {
160
- from: {
161
- prompt: action.thought || '',
162
- bbox: pointToBbox({
163
- x: startPoint[0],
164
- y: startPoint[1]
165
- }, shotSize.width, shotSize.height)
166
- },
167
- to: {
168
- prompt: action.thought || '',
169
- bbox: pointToBbox({
170
- x: endPoint[0],
171
- y: endPoint[1]
172
- }, shotSize.width, shotSize.height)
173
- }
153
+ from: pointToLocateParam(startPoint, action.thought, shotSize),
154
+ to: pointToLocateParam(endPoint, action.thought, shotSize)
174
155
  },
175
156
  thought: action.thought || ''
176
157
  });
@@ -193,7 +174,7 @@ async function uiTarsPlanning(userInstruction, options) {
193
174
  transformActions.push({
194
175
  type: 'Finished',
195
176
  param: {},
196
- thought: action.thought || ''
177
+ thought: action.action_inputs.content || action.thought || ''
197
178
  });
198
179
  } else if ('hotkey' === actionType) if (action.action_inputs.key) {
199
180
  const keys = (0, us_keyboard_layout_namespaceObject.transformHotkeyInput)(action.action_inputs.key);
@@ -262,14 +243,15 @@ function convertBboxToCoordinates(text) {
262
243
  const y = Math.floor((y1Num + y2Num) / 2);
263
244
  return `(${x},${y})`;
264
245
  }
265
- const cleanedText = text.replace(/\[EOS\]/g, '');
246
+ const cleanedText = text.replace(/\[EOS\]/g, '').replace(/```(?:[a-zA-Z0-9_-]+)?/g, '');
266
247
  return cleanedText.replace(pattern, replaceMatch).trim();
267
248
  }
268
- function getPoint(startBox, size) {
249
+ function getPoint(startBox) {
269
250
  const [x, y] = JSON.parse(startBox);
251
+ (0, utils_namespaceObject.assert)('number' == typeof x && Number.isFinite(x) && 'number' == typeof y && Number.isFinite(y), `invalid point data for ui-tars planning: ${startBox}`);
270
252
  return [
271
- x * size.width,
272
- y * size.height
253
+ x,
254
+ y
273
255
  ];
274
256
  }
275
257
  exports.uiTarsPlanning = __webpack_exports__.uiTarsPlanning;
@@ -280,4 +262,4 @@ Object.defineProperty(exports, '__esModule', {
280
262
  value: true
281
263
  });
282
264
 
283
- //# sourceMappingURL=ui-tars-planning.js.map
265
+ //# sourceMappingURL=planning.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai-model/models/ui-tars/planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../../src/ai-model/models/ui-tars/planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n PlanningAIResponse,\n PlanningAction,\n PlanningLocateParamWithLocatedPixelBbox,\n Size,\n} from '@/types';\nimport type { UITarsModelVersion } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { transformHotkeyInput } from '@midscene/shared/us-keyboard-layout';\nimport { assert } from '@midscene/shared/utils';\nimport { actionParser } from '@ui-tars/action-parser';\nimport {\n getSummary,\n getUiTarsPlanningPrompt,\n} from '../../prompt/ui-tars-planning';\nimport {\n AIResponseParseError,\n callAIWithStringResponse,\n} from '../../service-caller/index';\nimport { finalizePixelBbox } from '../../shared/model-locate-result/bbox';\nimport { mapLocateResultToPixelBboxByCoordinates } from '../../shared/model-locate-result/pixel-bbox-mapper';\nimport type { PlanOptions } from '../../workflows/planning/types';\n\ntype ActionType =\n | 'click'\n | 'left_double'\n | 'right_single'\n | 'drag'\n | 'type'\n | 'hotkey'\n | 'finished'\n | 'scroll'\n | 'wait';\n\nconst debug = getDebug('ui-tars-planning');\nconst warnLog = getDebug('ui-tars-planning', { console: true });\n\nfunction pointToLocateParam(\n point: [number, number],\n thought: string | null,\n size: Size,\n): PlanningLocateParamWithLocatedPixelBbox {\n const ctx = { preparedSize: size };\n const pixelBbox = mapLocateResultToPixelBboxByCoordinates(\n { type: 'point', coordinates: point },\n ctx,\n { shape: 'point', order: 'xy', normalizedBy: 1 },\n );\n\n return {\n prompt: thought || '',\n locatedPixelBbox: finalizePixelBbox(pixelBbox, point, ctx),\n };\n}\n\nexport async function uiTarsPlanning(\n userInstruction: string,\n options: PlanOptions,\n uiTarsModelVersion: UITarsModelVersion,\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, modelRuntime, actionContext } = options;\n\n let instruction = userInstruction;\n if (actionContext) {\n instruction = `<high_priority_knowledge>${actionContext}</high_priority_knowledge>\\n<user_instruction>${userInstruction}</user_instruction>`;\n }\n\n const systemPrompt = getUiTarsPlanningPrompt() + instruction;\n\n const screenshotBase64 = context.screenshot.base64;\n\n conversationHistory.append({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n },\n },\n ],\n });\n\n const res = await callAIWithStringResponse(\n [\n {\n role: 'user',\n content: systemPrompt,\n },\n ...conversationHistory.snapshot(),\n ],\n modelRuntime,\n {\n abortSignal: options.abortSignal,\n },\n );\n\n let convertedText: string;\n let parsed: ReturnType<typeof actionParser>['parsed'];\n\n try {\n convertedText = convertBboxToCoordinates(res.content);\n\n const { shotSize } = context;\n const parseResult = actionParser({\n prediction: convertedText,\n factor: [1000, 1000],\n screenContext: {\n width: shotSize.width,\n height: shotSize.height,\n },\n modelVer: uiTarsModelVersion,\n });\n parsed = parseResult.parsed;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `Parse error: ${errorMessage}`,\n JSON.stringify(res.content, undefined, 2),\n res.usage,\n );\n }\n\n const { shotSize } = context;\n\n debug(\n 'ui-tars modelVer',\n uiTarsModelVersion,\n ', parsed',\n JSON.stringify(parsed),\n );\n\n const transformActions: PlanningAction[] = [];\n const unhandledActions: Array<{ type: string; thought: string }> = [];\n let shouldContinue = true;\n parsed.forEach((action) => {\n const actionType = (action.action_type || '').toLowerCase();\n if (actionType === 'click') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box);\n\n const locate = pointToLocateParam(point, action.thought, shotSize);\n\n transformActions.push({\n type: 'Tap',\n param: {\n locate,\n },\n });\n } else if (actionType === 'left_double') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box);\n\n const locate = pointToLocateParam(point, action.thought, shotSize);\n\n transformActions.push({\n type: 'DoubleClick',\n param: {\n locate,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'right_single') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box);\n\n const locate = pointToLocateParam(point, action.thought, shotSize);\n\n transformActions.push({\n type: 'RightClick',\n param: {\n locate,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'drag') {\n assert(action.action_inputs.start_box, 'start_box is required');\n assert(action.action_inputs.end_box, 'end_box is required');\n const startPoint = getPoint(action.action_inputs.start_box);\n const endPoint = getPoint(action.action_inputs.end_box);\n transformActions.push({\n type: 'DragAndDrop',\n param: {\n from: pointToLocateParam(startPoint, action.thought, shotSize),\n to: pointToLocateParam(endPoint, action.thought, shotSize),\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'type') {\n transformActions.push({\n type: 'Input',\n param: {\n value: action.action_inputs.content,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'scroll') {\n transformActions.push({\n type: 'Scroll',\n param: {\n direction: action.action_inputs.direction,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'finished') {\n shouldContinue = false;\n transformActions.push({\n type: 'Finished',\n param: {},\n thought: action.action_inputs.content || action.thought || '',\n });\n } else if (actionType === 'hotkey') {\n if (!action.action_inputs.key) {\n warnLog('No key found in action: hotkey. Will not perform action.');\n } else {\n const keys = transformHotkeyInput(action.action_inputs.key);\n\n transformActions.push({\n type: 'KeyboardPress',\n param: {\n keyName: keys.join('+'),\n },\n thought: action.thought || '',\n });\n }\n } else if (actionType === 'wait') {\n transformActions.push({\n type: 'Sleep',\n param: {\n timeMs: 1000,\n },\n thought: action.thought || '',\n });\n } else if (actionType) {\n // Track unhandled action types\n unhandledActions.push({\n type: actionType,\n thought: action.thought || '',\n });\n debug('Unhandled action type:', actionType, 'thought:', action.thought);\n }\n });\n\n if (transformActions.length === 0) {\n const errorDetails: string[] = [];\n\n // Check if parsing failed\n if (parsed.length === 0) {\n errorDetails.push('Action parser returned no actions');\n\n // Check if response has Thought but no Action\n if (\n res.content.includes('Thought:') &&\n !res.content.includes('Action:')\n ) {\n errorDetails.push(\n 'Response contains \"Thought:\" but missing \"Action:\" line',\n );\n } else {\n errorDetails.push('Response may be malformed or empty');\n }\n }\n\n // Check if we have unhandled action types\n if (unhandledActions.length > 0) {\n const types = unhandledActions.map((a) => a.type).join(', ');\n errorDetails.push(`Unhandled action types: ${types}`);\n }\n\n const errorMessage = [\n 'No actions found in UI-TARS response.',\n ...errorDetails,\n ].join('\\n');\n\n // Throw AIResponseParseError with usage and rawResponse preserved\n throw new AIResponseParseError(\n errorMessage,\n JSON.stringify(res.content, undefined, 2),\n res.usage,\n );\n }\n\n debug('transformActions', JSON.stringify(transformActions, null, 2));\n const log = getSummary(res.content);\n\n conversationHistory.append({\n role: 'assistant',\n content: log,\n });\n\n return {\n actions: transformActions,\n log,\n usage: res.usage,\n rawResponse: JSON.stringify(res.content, undefined, 2),\n shouldContinuePlanning: shouldContinue,\n };\n}\n\n/**\n * Converts bounding box notation to coordinate points\n * @param text - The text containing bbox tags to be converted\n * @returns The text with bbox tags replaced by coordinate points\n */\nfunction convertBboxToCoordinates(text: string): string {\n // Match the four numbers after <bbox>\n const pattern = /<bbox>(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)<\\/bbox>/g;\n\n function replaceMatch(\n match: string,\n x1: string,\n y1: string,\n x2: string,\n y2: string,\n ): string {\n // Convert strings to numbers and calculate center point\n const x1Num = Number.parseInt(x1, 10);\n const y1Num = Number.parseInt(y1, 10);\n const x2Num = Number.parseInt(x2, 10);\n const y2Num = Number.parseInt(y2, 10);\n\n // Use Math.floor to truncate and calculate center point\n const x = Math.floor((x1Num + x2Num) / 2);\n const y = Math.floor((y1Num + y2Num) / 2);\n\n // Return formatted coordinate string\n return `(${x},${y})`;\n }\n\n // Remove common model wrappers before handing the response to UI-TARS parser.\n const cleanedText = text\n .replace(/\\[EOS\\]/g, '')\n .replace(/```(?:[a-zA-Z0-9_-]+)?/g, '');\n return cleanedText.replace(pattern, replaceMatch).trim();\n}\n\nfunction getPoint(startBox: string): [number, number] {\n const [x, y] = JSON.parse(startBox);\n assert(\n typeof x === 'number' &&\n Number.isFinite(x) &&\n typeof y === 'number' &&\n Number.isFinite(y),\n `invalid point data for ui-tars planning: ${startBox}`,\n );\n return [x, y];\n}\n\ninterface BaseAction {\n action_type: ActionType;\n action_inputs: Record<string, any>;\n reflection: string | null;\n thought: string | null;\n}\n\ninterface ClickAction extends BaseAction {\n action_type: 'click';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface DragAction extends BaseAction {\n action_type: 'drag';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n end_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface WaitAction extends BaseAction {\n action_type: 'wait';\n action_inputs: {\n time: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface LeftDoubleAction extends BaseAction {\n action_type: 'left_double';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface RightSingleAction extends BaseAction {\n action_type: 'right_single';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface TypeAction extends BaseAction {\n action_type: 'type';\n action_inputs: {\n content: string;\n };\n}\n\ninterface HotkeyAction extends BaseAction {\n action_type: 'hotkey';\n action_inputs: {\n key: string;\n };\n}\n\ninterface ScrollAction extends BaseAction {\n action_type: 'scroll';\n action_inputs: {\n direction: 'up' | 'down';\n };\n}\n\ninterface FinishedAction extends BaseAction {\n action_type: 'finished';\n action_inputs: {\n content?: string;\n };\n}\n\nexport type Action =\n | ClickAction\n | LeftDoubleAction\n | RightSingleAction\n | DragAction\n | TypeAction\n | HotkeyAction\n | ScrollAction\n | FinishedAction\n | WaitAction;\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","warnLog","pointToLocateParam","point","thought","size","ctx","pixelBbox","mapLocateResultToPixelBboxByCoordinates","finalizePixelBbox","uiTarsPlanning","userInstruction","options","uiTarsModelVersion","conversationHistory","context","modelRuntime","actionContext","instruction","systemPrompt","getUiTarsPlanningPrompt","screenshotBase64","res","callAIWithStringResponse","convertedText","parsed","convertBboxToCoordinates","shotSize","parseResult","actionParser","parseError","errorMessage","Error","String","AIResponseParseError","JSON","undefined","transformActions","unhandledActions","shouldContinue","action","actionType","assert","getPoint","locate","startPoint","endPoint","keys","transformHotkeyInput","errorDetails","types","a","log","getSummary","text","pattern","replaceMatch","match","x1","y1","x2","y2","x1Num","Number","y1Num","x2Num","y2Num","x","Math","y","cleanedText","startBox"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;AC4BA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AACvB,MAAMC,UAAUD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS,oBAAoB;IAAE,SAAS;AAAK;AAE7D,SAASE,mBACPC,KAAuB,EACvBC,OAAsB,EACtBC,IAAU;IAEV,MAAMC,MAAM;QAAE,cAAcD;IAAK;IACjC,MAAME,YAAYC,AAAAA,IAAAA,qCAAAA,uCAAAA,AAAAA,EAChB;QAAE,MAAM;QAAS,aAAaL;IAAM,GACpCG,KACA;QAAE,OAAO;QAAS,OAAO;QAAM,cAAc;IAAE;IAGjD,OAAO;QACL,QAAQF,WAAW;QACnB,kBAAkBK,AAAAA,IAAAA,wBAAAA,iBAAAA,AAAAA,EAAkBF,WAAWJ,OAAOG;IACxD;AACF;AAEO,eAAeI,eACpBC,eAAuB,EACvBC,OAAoB,EACpBC,kBAAsC;IAEtC,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,YAAY,EAAEC,aAAa,EAAE,GAAGL;IAEtE,IAAIM,cAAcP;IAClB,IAAIM,eACFC,cAAc,CAAC,yBAAyB,EAAED,cAAc,8CAA8C,EAAEN,gBAAgB,mBAAmB,CAAC;IAG9I,MAAMQ,eAAeC,AAAAA,IAAAA,oCAAAA,uBAAAA,AAAAA,MAA4BF;IAEjD,MAAMG,mBAAmBN,QAAQ,UAAU,CAAC,MAAM;IAElDD,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKO;gBACP;YACF;SACD;IACH;IAEA,MAAMC,MAAM,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EAChB;QACE;YACE,MAAM;YACN,SAASJ;QACX;WACGL,oBAAoB,QAAQ;KAChC,EACDE,cACA;QACE,aAAaJ,QAAQ,WAAW;IAClC;IAGF,IAAIY;IACJ,IAAIC;IAEJ,IAAI;QACFD,gBAAgBE,yBAAyBJ,IAAI,OAAO;QAEpD,MAAM,EAAEK,QAAQ,EAAE,GAAGZ;QACrB,MAAMa,cAAcC,AAAAA,IAAAA,8BAAAA,YAAAA,AAAAA,EAAa;YAC/B,YAAYL;YACZ,QAAQ;gBAAC;gBAAM;aAAK;YACpB,eAAe;gBACb,OAAOG,SAAS,KAAK;gBACrB,QAAQA,SAAS,MAAM;YACzB;YACA,UAAUd;QACZ;QACAY,SAASG,YAAY,MAAM;IAC7B,EAAE,OAAOE,YAAY;QAEnB,MAAMC,eACJD,sBAAsBE,QAAQF,WAAW,OAAO,GAAGG,OAAOH;QAC5D,MAAM,IAAII,yBAAAA,oBAAoBA,CAC5B,CAAC,aAAa,EAAEH,cAAc,EAC9BI,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW,IACvCd,IAAI,KAAK;IAEb;IAEA,MAAM,EAAEK,QAAQ,EAAE,GAAGZ;IAErBhB,MACE,oBACAc,oBACA,YACAsB,KAAK,SAAS,CAACV;IAGjB,MAAMY,mBAAqC,EAAE;IAC7C,MAAMC,mBAA6D,EAAE;IACrE,IAAIC,iBAAiB;IACrBd,OAAO,OAAO,CAAC,CAACe;QACd,MAAMC,aAAcD,AAAAA,CAAAA,OAAO,WAAW,IAAI,EAAC,EAAG,WAAW;QACzD,IAAIC,AAAe,YAAfA,YAAwB;YAC1BC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMrC,QAAQwC,SAASH,OAAO,aAAa,CAAC,SAAS;YAErD,MAAMI,SAAS1C,mBAAmBC,OAAOqC,OAAO,OAAO,EAAEb;YAEzDU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACLO;gBACF;YACF;QACF,OAAO,IAAIH,AAAe,kBAAfA,YAA8B;YACvCC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMrC,QAAQwC,SAASH,OAAO,aAAa,CAAC,SAAS;YAErD,MAAMI,SAAS1C,mBAAmBC,OAAOqC,OAAO,OAAO,EAAEb;YAEzDU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACLO;gBACF;gBACA,SAASJ,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,mBAAfA,YAA+B;YACxCC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMrC,QAAQwC,SAASH,OAAO,aAAa,CAAC,SAAS;YAErD,MAAMI,SAAS1C,mBAAmBC,OAAOqC,OAAO,OAAO,EAAEb;YAEzDU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACLO;gBACF;gBACA,SAASJ,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YAAuB;YAChCC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvCE,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,OAAO,EAAE;YACrC,MAAMK,aAAaF,SAASH,OAAO,aAAa,CAAC,SAAS;YAC1D,MAAMM,WAAWH,SAASH,OAAO,aAAa,CAAC,OAAO;YACtDH,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,MAAMnC,mBAAmB2C,YAAYL,OAAO,OAAO,EAAEb;oBACrD,IAAIzB,mBAAmB4C,UAAUN,OAAO,OAAO,EAAEb;gBACnD;gBACA,SAASa,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,OAAOG,OAAO,aAAa,CAAC,OAAO;YACrC;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,aAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,WAAWG,OAAO,aAAa,CAAC,SAAS;YAC3C;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,eAAfA,YAA2B;YACpCF,iBAAiB;YACjBF,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO,CAAC;gBACR,SAASG,OAAO,aAAa,CAAC,OAAO,IAAIA,OAAO,OAAO,IAAI;YAC7D;QACF,OAAO,IAAIC,AAAe,aAAfA,YACT,IAAKD,OAAO,aAAa,CAAC,GAAG,EAEtB;YACL,MAAMO,OAAOC,AAAAA,IAAAA,mCAAAA,oBAAAA,AAAAA,EAAqBR,OAAO,aAAa,CAAC,GAAG;YAE1DH,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,SAASU,KAAK,IAAI,CAAC;gBACrB;gBACA,SAASP,OAAO,OAAO,IAAI;YAC7B;QACF,OAXEvC,QAAQ;aAYL,IAAIwC,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,QAAQ;YACV;YACA,SAASG,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,YAAY;YAErBH,iBAAiB,IAAI,CAAC;gBACpB,MAAMG;gBACN,SAASD,OAAO,OAAO,IAAI;YAC7B;YACAzC,MAAM,0BAA0B0C,YAAY,YAAYD,OAAO,OAAO;QACxE;IACF;IAEA,IAAIH,AAA4B,MAA5BA,iBAAiB,MAAM,EAAQ;QACjC,MAAMY,eAAyB,EAAE;QAGjC,IAAIxB,AAAkB,MAAlBA,OAAO,MAAM,EAAQ;YACvBwB,aAAa,IAAI,CAAC;YAGlB,IACE3B,IAAI,OAAO,CAAC,QAAQ,CAAC,eACrB,CAACA,IAAI,OAAO,CAAC,QAAQ,CAAC,YAEtB2B,aAAa,IAAI,CACf;iBAGFA,aAAa,IAAI,CAAC;QAEtB;QAGA,IAAIX,iBAAiB,MAAM,GAAG,GAAG;YAC/B,MAAMY,QAAQZ,iBAAiB,GAAG,CAAC,CAACa,IAAMA,EAAE,IAAI,EAAE,IAAI,CAAC;YACvDF,aAAa,IAAI,CAAC,CAAC,wBAAwB,EAAEC,OAAO;QACtD;QAEA,MAAMnB,eAAe;YACnB;eACGkB;SACJ,CAAC,IAAI,CAAC;QAGP,MAAM,IAAIf,yBAAAA,oBAAoBA,CAC5BH,cACAI,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW,IACvCd,IAAI,KAAK;IAEb;IAEAvB,MAAM,oBAAoBoC,KAAK,SAAS,CAACE,kBAAkB,MAAM;IACjE,MAAMe,MAAMC,AAAAA,IAAAA,oCAAAA,UAAAA,AAAAA,EAAW/B,IAAI,OAAO;IAElCR,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAASsC;IACX;IAEA,OAAO;QACL,SAASf;QACTe;QACA,OAAO9B,IAAI,KAAK;QAChB,aAAaa,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW;QACpD,wBAAwBG;IAC1B;AACF;AAOA,SAASb,yBAAyB4B,IAAY;IAE5C,MAAMC,UAAU;IAEhB,SAASC,aACPC,KAAa,EACbC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU;QAGV,MAAMC,QAAQC,OAAO,QAAQ,CAACL,IAAI;QAClC,MAAMM,QAAQD,OAAO,QAAQ,CAACJ,IAAI;QAClC,MAAMM,QAAQF,OAAO,QAAQ,CAACH,IAAI;QAClC,MAAMM,QAAQH,OAAO,QAAQ,CAACF,IAAI;QAGlC,MAAMM,IAAIC,KAAK,KAAK,CAAEN,AAAAA,CAAAA,QAAQG,KAAI,IAAK;QACvC,MAAMI,IAAID,KAAK,KAAK,CAAEJ,AAAAA,CAAAA,QAAQE,KAAI,IAAK;QAGvC,OAAO,CAAC,CAAC,EAAEC,EAAE,CAAC,EAAEE,EAAE,CAAC,CAAC;IACtB;IAGA,MAAMC,cAAchB,KACjB,OAAO,CAAC,YAAY,IACpB,OAAO,CAAC,2BAA2B;IACtC,OAAOgB,YAAY,OAAO,CAACf,SAASC,cAAc,IAAI;AACxD;AAEA,SAASb,SAAS4B,QAAgB;IAChC,MAAM,CAACJ,GAAGE,EAAE,GAAGlC,KAAK,KAAK,CAACoC;IAC1B7B,IAAAA,sBAAAA,MAAAA,AAAAA,EACE,AAAa,YAAb,OAAOyB,KACLJ,OAAO,QAAQ,CAACI,MAChB,AAAa,YAAb,OAAOE,KACPN,OAAO,QAAQ,CAACM,IAClB,CAAC,yCAAyC,EAAEE,UAAU;IAExD,OAAO;QAACJ;QAAGE;KAAE;AACf"}
@@ -30,7 +30,7 @@ __webpack_require__.d(__webpack_exports__, {
30
30
  extractDataQueryPrompt: ()=>extractDataQueryPrompt
31
31
  });
32
32
  const env_namespaceObject = require("@midscene/shared/env");
33
- const index_js_namespaceObject = require("../service-caller/index.js");
33
+ const json_js_namespaceObject = require("../service-caller/json.js");
34
34
  const external_util_js_namespaceObject = require("./util.js");
35
35
  function buildTypeQueryDemandValue(type, demand) {
36
36
  const currentScreenshotConstraint = 'based on the current screenshot and its contents if provided, unless the user explicitly asks to compare with reference images';
@@ -45,13 +45,13 @@ function parseXMLExtractionResponse(xmlString) {
45
45
  if (!dataJsonStr) throw new Error('Missing required field: data-json');
46
46
  let data;
47
47
  try {
48
- data = (0, index_js_namespaceObject.parseModelResponseJson)(dataJsonStr, void 0);
48
+ data = (0, json_js_namespaceObject.safeParseJson)(dataJsonStr);
49
49
  } catch (e) {
50
50
  throw new Error(`Failed to parse data-json: ${e}`);
51
51
  }
52
52
  let errors;
53
53
  if (errorsStr) try {
54
- const parsedErrors = (0, index_js_namespaceObject.parseModelResponseJson)(errorsStr, void 0);
54
+ const parsedErrors = (0, json_js_namespaceObject.safeParseJson)(errorsStr);
55
55
  if (Array.isArray(parsedErrors)) errors = parsedErrors;
56
56
  } catch (e) {}
57
57
  return {
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/prompt/extraction.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/extraction.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { AIDataExtractionResponse, ServiceExtractParam } from '@/types';\nimport { getPreferredLanguage } from '@midscene/shared/env';\nimport { parseModelResponseJson } from '../service-caller/index';\nimport { extractXMLTag } from './util';\n\nexport function buildTypeQueryDemandValue(\n type: 'Boolean' | 'Number' | 'String' | 'Assert' | 'WaitFor',\n demand: ServiceExtractParam,\n) {\n const currentScreenshotConstraint =\n 'based on the current screenshot and its contents if provided, unless the user explicitly asks to compare with reference images';\n\n if (type === 'Assert') {\n return `Boolean, ${currentScreenshotConstraint}, whether the following statement is true: ${demand}`;\n }\n\n if (type === 'WaitFor') {\n return `Boolean, the user wants to do some 'wait for' operation. ${currentScreenshotConstraint}, please check whether the following statement is true: ${demand}`;\n }\n\n return `${type}, ${currentScreenshotConstraint}, ${demand}`;\n}\n\n/**\n * Parse XML response from LLM and convert to AIDataExtractionResponse\n */\nexport function parseXMLExtractionResponse<T>(\n xmlString: string,\n): AIDataExtractionResponse<T> {\n const thought = extractXMLTag(xmlString, 'thought');\n const dataJsonStr = extractXMLTag(xmlString, 'data-json');\n const errorsStr = extractXMLTag(xmlString, 'errors');\n\n // Parse data-json (required)\n if (!dataJsonStr) {\n throw new Error('Missing required field: data-json');\n }\n\n let data: T;\n try {\n data = parseModelResponseJson(dataJsonStr, undefined) as T;\n } catch (e) {\n throw new Error(`Failed to parse data-json: ${e}`);\n }\n\n // Parse errors (optional)\n let errors: string[] | undefined;\n if (errorsStr) {\n try {\n const parsedErrors = parseModelResponseJson(errorsStr, undefined);\n if (Array.isArray(parsedErrors)) {\n errors = parsedErrors;\n }\n } catch (e) {\n // If errors parsing fails, just ignore it\n }\n }\n\n return {\n ...(thought ? { thought } : {}),\n data,\n ...(errors && errors.length > 0 ? { errors } : {}),\n };\n}\n\nexport function systemPromptToExtract(options?: {\n screenshotIncluded?: boolean;\n referenceImagesIncluded?: boolean;\n}) {\n const preferredLanguage = getPreferredLanguage();\n const screenshotIncluded = options?.screenshotIncluded ?? true;\n const referenceImagesIncluded = options?.referenceImagesIncluded ?? false;\n\n const contextPrompts = [\n \"The user will give you data requirements in <DATA_DEMAND>. You need to understand the user's requirements and extract the data satisfying the <DATA_DEMAND>.\",\n ];\n\n if (screenshotIncluded) {\n contextPrompts.push(\n 'The user will provide a current screenshot to evaluate, and may provide its contents. Base your answer on the current screenshot and its contents when provided. Treat them as the primary source of truth for what is currently visible or true.',\n );\n } else {\n contextPrompts.push(\n 'The user will not provide a current screenshot. Use only the supplied page contents and other inputs, and do not infer unsupported visual details.',\n );\n }\n\n if (referenceImagesIncluded) {\n const referenceImagesPrompt =\n 'Reference images are supporting context only unless <DATA_DEMAND> explicitly asks for comparison, matching, or reasoning about them.';\n contextPrompts.push(\n screenshotIncluded\n ? `${referenceImagesPrompt} Do not conclude that something exists in the current screenshot solely because it appears in a reference image; when they conflict, trust the current screenshot and its contents.`\n : `${referenceImagesPrompt} Do not treat reference images as direct evidence of the current state unless the demand explicitly asks you to use them that way.`,\n );\n }\n const contextPrompt = contextPrompts.join('\\n\\n');\n\n return `\nYou are a versatile professional in software UI design and testing. Your outstanding contributions will impact the user experience of billions of users.\n\n${contextPrompt}\n\nIf a key specifies a JSON data type (such as Number, String, Boolean, Object, Array), ensure the returned value strictly matches that data type.\n\nWhen DATA_DEMAND is a JSON object, the keys in your response must exactly match the keys in DATA_DEMAND. Do not rename, translate, or substitute any key.\n\n\nReturn in the following XML format:\n<thought>the thinking process of the extraction, less than 300 words. Use ${preferredLanguage} in this field.</thought>\n<data-json>the extracted data as JSON. Make sure both the value and scheme meet the DATA_DEMAND. If you want to write some description in this field, use the same language as the DATA_DEMAND.</data-json>\n<errors>optional error messages as JSON array, e.g., [\"error1\", \"error2\"]</errors>\n\n# Example 1\nFor example, if the DATA_DEMAND is:\n\n<DATA_DEMAND>\n{\n \"name\": \"name shows on the left panel, string\",\n \"age\": \"age shows on the right panel, number\",\n \"isAdmin\": \"if the user is admin, boolean\"\n}\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n<thought>According to the screenshot, i can see ...</thought>\n<data-json>\n{\n \"name\": \"John\",\n \"age\": 30,\n \"isAdmin\": true\n}\n</data-json>\n\n# Example 2\nIf the DATA_DEMAND is:\n\n<DATA_DEMAND>\nthe todo items list, string[]\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n<thought>According to the screenshot, i can see ...</thought>\n<data-json>\n[\"todo 1\", \"todo 2\", \"todo 3\"]\n</data-json>\n\n# Example 3\nIf the DATA_DEMAND is:\n\n<DATA_DEMAND>\nthe page title, string\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n<thought>According to the screenshot, i can see ...</thought>\n<data-json>\n\"todo list\"\n</data-json>\n\n# Example 4\nIf the DATA_DEMAND is:\n\n<DATA_DEMAND>\n{\n \"StatementIsTruthy\": \"Boolean, is it currently the SMS page?\"\n}\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n<thought>According to the screenshot, i can see ...</thought>\n<data-json>\n{ \"StatementIsTruthy\": true }\n</data-json>\n`;\n}\n\nexport const extractDataQueryPrompt = (\n pageDescription: string,\n dataQuery: string | Record<string, string>,\n) => {\n let dataQueryText = '';\n if (typeof dataQuery === 'string') {\n dataQueryText = dataQuery;\n } else {\n dataQueryText = JSON.stringify(dataQuery, null, 2);\n }\n\n return `\n<PageDescription>\n${pageDescription}\n</PageDescription>\n\n<DATA_DEMAND>\n${dataQueryText}\n</DATA_DEMAND>\n `;\n};\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","buildTypeQueryDemandValue","type","demand","currentScreenshotConstraint","parseXMLExtractionResponse","xmlString","thought","extractXMLTag","dataJsonStr","errorsStr","Error","data","parseModelResponseJson","undefined","e","errors","parsedErrors","Array","systemPromptToExtract","options","preferredLanguage","getPreferredLanguage","screenshotIncluded","referenceImagesIncluded","contextPrompts","referenceImagesPrompt","contextPrompt","extractDataQueryPrompt","pageDescription","dataQuery","dataQueryText","JSON"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;ACDO,SAASI,0BACdC,IAA4D,EAC5DC,MAA2B;IAE3B,MAAMC,8BACJ;IAEF,IAAIF,AAAS,aAATA,MACF,OAAO,CAAC,SAAS,EAAEE,4BAA4B,2CAA2C,EAAED,QAAQ;IAGtG,IAAID,AAAS,cAATA,MACF,OAAO,CAAC,yDAAyD,EAAEE,4BAA4B,wDAAwD,EAAED,QAAQ;IAGnK,OAAO,GAAGD,KAAK,EAAE,EAAEE,4BAA4B,EAAE,EAAED,QAAQ;AAC7D;AAKO,SAASE,2BACdC,SAAiB;IAEjB,MAAMC,UAAUC,AAAAA,IAAAA,iCAAAA,aAAAA,AAAAA,EAAcF,WAAW;IACzC,MAAMG,cAAcD,AAAAA,IAAAA,iCAAAA,aAAAA,AAAAA,EAAcF,WAAW;IAC7C,MAAMI,YAAYF,AAAAA,IAAAA,iCAAAA,aAAAA,AAAAA,EAAcF,WAAW;IAG3C,IAAI,CAACG,aACH,MAAM,IAAIE,MAAM;IAGlB,IAAIC;IACJ,IAAI;QACFA,OAAOC,AAAAA,IAAAA,yBAAAA,sBAAAA,AAAAA,EAAuBJ,aAAaK;IAC7C,EAAE,OAAOC,GAAG;QACV,MAAM,IAAIJ,MAAM,CAAC,2BAA2B,EAAEI,GAAG;IACnD;IAGA,IAAIC;IACJ,IAAIN,WACF,IAAI;QACF,MAAMO,eAAeJ,AAAAA,IAAAA,yBAAAA,sBAAAA,AAAAA,EAAuBH,WAAWI;QACvD,IAAII,MAAM,OAAO,CAACD,eAChBD,SAASC;IAEb,EAAE,OAAOF,GAAG,CAEZ;IAGF,OAAO;QACL,GAAIR,UAAU;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAC9BK;QACA,GAAII,UAAUA,OAAO,MAAM,GAAG,IAAI;YAAEA;QAAO,IAAI,CAAC,CAAC;IACnD;AACF;AAEO,SAASG,sBAAsBC,OAGrC;IACC,MAAMC,oBAAoBC,AAAAA,IAAAA,oBAAAA,oBAAAA,AAAAA;IAC1B,MAAMC,qBAAqBH,SAAS,sBAAsB;IAC1D,MAAMI,0BAA0BJ,SAAS,2BAA2B;IAEpE,MAAMK,iBAAiB;QACrB;KACD;IAED,IAAIF,oBACFE,eAAe,IAAI,CACjB;SAGFA,eAAe,IAAI,CACjB;IAIJ,IAAID,yBAAyB;QAC3B,MAAME,wBACJ;QACFD,eAAe,IAAI,CACjBF,qBACI,GAAGG,sBAAsB,mLAAmL,CAAC,GAC7M,GAAGA,sBAAsB,kIAAkI,CAAC;IAEpK;IACA,MAAMC,gBAAgBF,eAAe,IAAI,CAAC;IAE1C,OAAO,CAAC;;;AAGV,EAAEE,cAAc;;;;;;;;0EAQ0D,EAAEN,kBAAkB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAqE9F,CAAC;AACD;AAEO,MAAMO,yBAAyB,CACpCC,iBACAC;IAEA,IAAIC,gBAAgB;IAElBA,gBADE,AAAqB,YAArB,OAAOD,YACOA,YAEAE,KAAK,SAAS,CAACF,WAAW,MAAM;IAGlD,OAAO,CAAC;;AAEV,EAAED,gBAAgB;;;;AAIlB,EAAEE,cAAc;;EAEd,CAAC;AACH"}
1
+ {"version":3,"file":"ai-model/prompt/extraction.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/extraction.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { AIDataExtractionResponse, ServiceExtractParam } from '@/types';\nimport { getPreferredLanguage } from '@midscene/shared/env';\nimport { safeParseJson } from '../service-caller/json';\nimport { extractXMLTag } from './util';\n\nexport function buildTypeQueryDemandValue(\n type: 'Boolean' | 'Number' | 'String' | 'Assert' | 'WaitFor',\n demand: ServiceExtractParam,\n) {\n const currentScreenshotConstraint =\n 'based on the current screenshot and its contents if provided, unless the user explicitly asks to compare with reference images';\n\n if (type === 'Assert') {\n return `Boolean, ${currentScreenshotConstraint}, whether the following statement is true: ${demand}`;\n }\n\n if (type === 'WaitFor') {\n return `Boolean, the user wants to do some 'wait for' operation. ${currentScreenshotConstraint}, please check whether the following statement is true: ${demand}`;\n }\n\n return `${type}, ${currentScreenshotConstraint}, ${demand}`;\n}\n\n/**\n * Parse XML response from LLM and convert to AIDataExtractionResponse\n */\nexport function parseXMLExtractionResponse<T>(\n xmlString: string,\n): AIDataExtractionResponse<T> {\n const thought = extractXMLTag(xmlString, 'thought');\n const dataJsonStr = extractXMLTag(xmlString, 'data-json');\n const errorsStr = extractXMLTag(xmlString, 'errors');\n\n // Parse data-json (required)\n if (!dataJsonStr) {\n throw new Error('Missing required field: data-json');\n }\n\n let data: T;\n try {\n data = safeParseJson(dataJsonStr) as T;\n } catch (e) {\n throw new Error(`Failed to parse data-json: ${e}`);\n }\n\n // Parse errors (optional)\n let errors: string[] | undefined;\n if (errorsStr) {\n try {\n const parsedErrors = safeParseJson(errorsStr);\n if (Array.isArray(parsedErrors)) {\n errors = parsedErrors;\n }\n } catch (e) {\n // If errors parsing fails, just ignore it\n }\n }\n\n return {\n ...(thought ? { thought } : {}),\n data,\n ...(errors && errors.length > 0 ? { errors } : {}),\n };\n}\n\nexport function systemPromptToExtract(options?: {\n screenshotIncluded?: boolean;\n referenceImagesIncluded?: boolean;\n}) {\n const preferredLanguage = getPreferredLanguage();\n const screenshotIncluded = options?.screenshotIncluded ?? true;\n const referenceImagesIncluded = options?.referenceImagesIncluded ?? false;\n\n const contextPrompts = [\n \"The user will give you data requirements in <DATA_DEMAND>. You need to understand the user's requirements and extract the data satisfying the <DATA_DEMAND>.\",\n ];\n\n if (screenshotIncluded) {\n contextPrompts.push(\n 'The user will provide a current screenshot to evaluate, and may provide its contents. Base your answer on the current screenshot and its contents when provided. Treat them as the primary source of truth for what is currently visible or true.',\n );\n } else {\n contextPrompts.push(\n 'The user will not provide a current screenshot. Use only the supplied page contents and other inputs, and do not infer unsupported visual details.',\n );\n }\n\n if (referenceImagesIncluded) {\n const referenceImagesPrompt =\n 'Reference images are supporting context only unless <DATA_DEMAND> explicitly asks for comparison, matching, or reasoning about them.';\n contextPrompts.push(\n screenshotIncluded\n ? `${referenceImagesPrompt} Do not conclude that something exists in the current screenshot solely because it appears in a reference image; when they conflict, trust the current screenshot and its contents.`\n : `${referenceImagesPrompt} Do not treat reference images as direct evidence of the current state unless the demand explicitly asks you to use them that way.`,\n );\n }\n const contextPrompt = contextPrompts.join('\\n\\n');\n\n return `\nYou are a versatile professional in software UI design and testing. Your outstanding contributions will impact the user experience of billions of users.\n\n${contextPrompt}\n\nIf a key specifies a JSON data type (such as Number, String, Boolean, Object, Array), ensure the returned value strictly matches that data type.\n\nWhen DATA_DEMAND is a JSON object, the keys in your response must exactly match the keys in DATA_DEMAND. Do not rename, translate, or substitute any key.\n\n\nReturn in the following XML format:\n<thought>the thinking process of the extraction, less than 300 words. Use ${preferredLanguage} in this field.</thought>\n<data-json>the extracted data as JSON. Make sure both the value and scheme meet the DATA_DEMAND. If you want to write some description in this field, use the same language as the DATA_DEMAND.</data-json>\n<errors>optional error messages as JSON array, e.g., [\"error1\", \"error2\"]</errors>\n\n# Example 1\nFor example, if the DATA_DEMAND is:\n\n<DATA_DEMAND>\n{\n \"name\": \"name shows on the left panel, string\",\n \"age\": \"age shows on the right panel, number\",\n \"isAdmin\": \"if the user is admin, boolean\"\n}\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n<thought>According to the screenshot, i can see ...</thought>\n<data-json>\n{\n \"name\": \"John\",\n \"age\": 30,\n \"isAdmin\": true\n}\n</data-json>\n\n# Example 2\nIf the DATA_DEMAND is:\n\n<DATA_DEMAND>\nthe todo items list, string[]\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n<thought>According to the screenshot, i can see ...</thought>\n<data-json>\n[\"todo 1\", \"todo 2\", \"todo 3\"]\n</data-json>\n\n# Example 3\nIf the DATA_DEMAND is:\n\n<DATA_DEMAND>\nthe page title, string\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n<thought>According to the screenshot, i can see ...</thought>\n<data-json>\n\"todo list\"\n</data-json>\n\n# Example 4\nIf the DATA_DEMAND is:\n\n<DATA_DEMAND>\n{\n \"StatementIsTruthy\": \"Boolean, is it currently the SMS page?\"\n}\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n<thought>According to the screenshot, i can see ...</thought>\n<data-json>\n{ \"StatementIsTruthy\": true }\n</data-json>\n`;\n}\n\nexport const extractDataQueryPrompt = (\n pageDescription: string,\n dataQuery: string | Record<string, string>,\n) => {\n let dataQueryText = '';\n if (typeof dataQuery === 'string') {\n dataQueryText = dataQuery;\n } else {\n dataQueryText = JSON.stringify(dataQuery, null, 2);\n }\n\n return `\n<PageDescription>\n${pageDescription}\n</PageDescription>\n\n<DATA_DEMAND>\n${dataQueryText}\n</DATA_DEMAND>\n `;\n};\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","buildTypeQueryDemandValue","type","demand","currentScreenshotConstraint","parseXMLExtractionResponse","xmlString","thought","extractXMLTag","dataJsonStr","errorsStr","Error","data","safeParseJson","e","errors","parsedErrors","Array","systemPromptToExtract","options","preferredLanguage","getPreferredLanguage","screenshotIncluded","referenceImagesIncluded","contextPrompts","referenceImagesPrompt","contextPrompt","extractDataQueryPrompt","pageDescription","dataQuery","dataQueryText","JSON"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;ACDO,SAASI,0BACdC,IAA4D,EAC5DC,MAA2B;IAE3B,MAAMC,8BACJ;IAEF,IAAIF,AAAS,aAATA,MACF,OAAO,CAAC,SAAS,EAAEE,4BAA4B,2CAA2C,EAAED,QAAQ;IAGtG,IAAID,AAAS,cAATA,MACF,OAAO,CAAC,yDAAyD,EAAEE,4BAA4B,wDAAwD,EAAED,QAAQ;IAGnK,OAAO,GAAGD,KAAK,EAAE,EAAEE,4BAA4B,EAAE,EAAED,QAAQ;AAC7D;AAKO,SAASE,2BACdC,SAAiB;IAEjB,MAAMC,UAAUC,AAAAA,IAAAA,iCAAAA,aAAAA,AAAAA,EAAcF,WAAW;IACzC,MAAMG,cAAcD,AAAAA,IAAAA,iCAAAA,aAAAA,AAAAA,EAAcF,WAAW;IAC7C,MAAMI,YAAYF,AAAAA,IAAAA,iCAAAA,aAAAA,AAAAA,EAAcF,WAAW;IAG3C,IAAI,CAACG,aACH,MAAM,IAAIE,MAAM;IAGlB,IAAIC;IACJ,IAAI;QACFA,OAAOC,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcJ;IACvB,EAAE,OAAOK,GAAG;QACV,MAAM,IAAIH,MAAM,CAAC,2BAA2B,EAAEG,GAAG;IACnD;IAGA,IAAIC;IACJ,IAAIL,WACF,IAAI;QACF,MAAMM,eAAeH,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH;QACnC,IAAIO,MAAM,OAAO,CAACD,eAChBD,SAASC;IAEb,EAAE,OAAOF,GAAG,CAEZ;IAGF,OAAO;QACL,GAAIP,UAAU;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAC9BK;QACA,GAAIG,UAAUA,OAAO,MAAM,GAAG,IAAI;YAAEA;QAAO,IAAI,CAAC,CAAC;IACnD;AACF;AAEO,SAASG,sBAAsBC,OAGrC;IACC,MAAMC,oBAAoBC,AAAAA,IAAAA,oBAAAA,oBAAAA,AAAAA;IAC1B,MAAMC,qBAAqBH,SAAS,sBAAsB;IAC1D,MAAMI,0BAA0BJ,SAAS,2BAA2B;IAEpE,MAAMK,iBAAiB;QACrB;KACD;IAED,IAAIF,oBACFE,eAAe,IAAI,CACjB;SAGFA,eAAe,IAAI,CACjB;IAIJ,IAAID,yBAAyB;QAC3B,MAAME,wBACJ;QACFD,eAAe,IAAI,CACjBF,qBACI,GAAGG,sBAAsB,mLAAmL,CAAC,GAC7M,GAAGA,sBAAsB,kIAAkI,CAAC;IAEpK;IACA,MAAMC,gBAAgBF,eAAe,IAAI,CAAC;IAE1C,OAAO,CAAC;;;AAGV,EAAEE,cAAc;;;;;;;;0EAQ0D,EAAEN,kBAAkB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAqE9F,CAAC;AACD;AAEO,MAAMO,yBAAyB,CACpCC,iBACAC;IAEA,IAAIC,gBAAgB;IAElBA,gBADE,AAAqB,YAArB,OAAOD,YACOA,YAEAE,KAAK,SAAS,CAACF,WAAW,MAAM;IAGlD,OAAO,CAAC;;AAEV,EAAED,gBAAgB;;;;AAIlB,EAAEE,cAAc;;EAEd,CAAC;AACH"}
@@ -28,10 +28,13 @@ __webpack_require__.d(__webpack_exports__, {
28
28
  findElementPrompt: ()=>findElementPrompt
29
29
  });
30
30
  const env_namespaceObject = require("@midscene/shared/env");
31
- const external_common_js_namespaceObject = require("./common.js");
32
- function systemPromptToLocateElement(modelFamily) {
31
+ const external_locate_grounding_rules_js_namespaceObject = require("./locate-grounding-rules.js");
32
+ const external_locate_param_example_js_namespaceObject = require("./locate-param-example.js");
33
+ function systemPromptToLocateElement(promptSpec) {
33
34
  const preferredLanguage = (0, env_namespaceObject.getPreferredLanguage)();
34
- const bboxComment = (0, external_common_js_namespaceObject.bboxDescription)(modelFamily);
35
+ const resultKey = promptSpec.resultKey;
36
+ const exampleValueText = (0, external_locate_param_example_js_namespaceObject.formatLocateExampleValue)(promptSpec.exampleValues[0]);
37
+ const resultFieldDescription = `the ${promptSpec.resultNoun} of the element that matches the user's description`;
35
38
  return `
36
39
  ## Role:
37
40
  You are an AI assistant that helps identify UI elements.
@@ -40,27 +43,24 @@ You are an AI assistant that helps identify UI elements.
40
43
  - Identify elements in screenshots that match the user's description.
41
44
  - Provide the coordinates of the element that matches the user's description.
42
45
 
43
- ## Important Notes for Locating Elements:
44
- - When the user describes an element that contains text (such as buttons, input fields, dropdown options, radio buttons, etc.), you should locate ONLY the text region of that element, not the entire element boundary.
45
- - For example: If an input field is large (both wide and tall) with a placeholder text "Please enter your comment", you should locate only the area where the placeholder text appears, not the entire input field.
46
- - This principle applies to all text-containing elements: focus on the visible text region rather than the full element container.
46
+ ${(0, external_locate_grounding_rules_js_namespaceObject.locateGroundingRules)()}
47
47
 
48
48
  ## Output Format:
49
49
  \`\`\`json
50
50
  {
51
- "bbox": [number, number, number, number], // ${bboxComment}
51
+ "${resultKey}": ${promptSpec.resultValueSchema}, // ${promptSpec.resultValueDescription}
52
52
  "errors"?: string[]
53
53
  }
54
54
  \`\`\`
55
55
 
56
56
  Fields:
57
- * \`bbox\` is the bounding box of the element that matches the user's description
57
+ * \`${resultKey}\` is ${resultFieldDescription}
58
58
  * \`errors\` is an optional array of error messages (if any)
59
59
 
60
60
  For example, when an element is found:
61
61
  \`\`\`json
62
62
  {
63
- "bbox": [100, 100, 200, 200],
63
+ "${resultKey}": ${exampleValueText},
64
64
  "errors": []
65
65
  }
66
66
  \`\`\`
@@ -68,7 +68,7 @@ For example, when an element is found:
68
68
  When no element is found:
69
69
  \`\`\`json
70
70
  {
71
- "bbox": [],
71
+ "${resultKey}": [],
72
72
  "errors": ["I can see ..., but {some element} is not found. Use ${preferredLanguage}."]
73
73
  }
74
74
  \`\`\`
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/prompt/llm-locator.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/llm-locator.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { TModelFamily } from '@midscene/shared/env';\nimport { getPreferredLanguage } from '@midscene/shared/env';\nimport { bboxDescription } from './common';\nexport function systemPromptToLocateElement(\n modelFamily: TModelFamily | undefined,\n) {\n const preferredLanguage = getPreferredLanguage();\n const bboxComment = bboxDescription(modelFamily);\n return `\n## Role:\nYou are an AI assistant that helps identify UI elements.\n\n## Objective:\n- Identify elements in screenshots that match the user's description.\n- Provide the coordinates of the element that matches the user's description.\n\n## Important Notes for Locating Elements:\n- When the user describes an element that contains text (such as buttons, input fields, dropdown options, radio buttons, etc.), you should locate ONLY the text region of that element, not the entire element boundary.\n- For example: If an input field is large (both wide and tall) with a placeholder text \"Please enter your comment\", you should locate only the area where the placeholder text appears, not the entire input field.\n- This principle applies to all text-containing elements: focus on the visible text region rather than the full element container.\n\n## Output Format:\n\\`\\`\\`json\n{\n \"bbox\": [number, number, number, number], // ${bboxComment}\n \"errors\"?: string[]\n}\n\\`\\`\\`\n\nFields:\n* \\`bbox\\` is the bounding box of the element that matches the user's description\n* \\`errors\\` is an optional array of error messages (if any)\n\nFor example, when an element is found:\n\\`\\`\\`json\n{\n \"bbox\": [100, 100, 200, 200],\n \"errors\": []\n}\n\\`\\`\\`\n\nWhen no element is found:\n\\`\\`\\`json\n{\n \"bbox\": [],\n \"errors\": [\"I can see ..., but {some element} is not found. Use ${preferredLanguage}.\"]\n}\n\\`\\`\\`\n`;\n}\n\nexport const findElementPrompt = (targetElementDescription: string) =>\n `Find: ${targetElementDescription}`;\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","systemPromptToLocateElement","modelFamily","preferredLanguage","getPreferredLanguage","bboxComment","bboxDescription","findElementPrompt","targetElementDescription"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;ACHO,SAASI,4BACdC,WAAqC;IAErC,MAAMC,oBAAoBC,AAAAA,IAAAA,oBAAAA,oBAAAA,AAAAA;IAC1B,MAAMC,cAAcC,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EAAgBJ;IACpC,OAAO,CAAC;;;;;;;;;;;;;;;;gDAgBsC,EAAEG,YAAY;;;;;;;;;;;;;;;;;;;;;kEAqBI,EAAEF,kBAAkB;;;AAGtF,CAAC;AACD;AAEO,MAAMI,oBAAoB,CAACC,2BAChC,CAAC,MAAM,EAAEA,0BAA0B"}
1
+ {"version":3,"file":"ai-model/prompt/llm-locator.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/llm-locator.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import { getPreferredLanguage } from '@midscene/shared/env';\nimport type { LocateResultPromptSpec } from '../shared/model-locate-result';\nimport { locateGroundingRules } from './locate-grounding-rules';\nimport { formatLocateExampleValue } from './locate-param-example';\n\nexport function systemPromptToLocateElement(\n promptSpec: LocateResultPromptSpec,\n) {\n const preferredLanguage = getPreferredLanguage();\n const resultKey = promptSpec.resultKey;\n const exampleValueText = formatLocateExampleValue(\n promptSpec.exampleValues[0],\n );\n const resultFieldDescription = `the ${promptSpec.resultNoun} of the element that matches the user's description`;\n return `\n## Role:\nYou are an AI assistant that helps identify UI elements.\n\n## Objective:\n- Identify elements in screenshots that match the user's description.\n- Provide the coordinates of the element that matches the user's description.\n\n${locateGroundingRules()}\n\n## Output Format:\n\\`\\`\\`json\n{\n \"${resultKey}\": ${promptSpec.resultValueSchema}, // ${promptSpec.resultValueDescription}\n \"errors\"?: string[]\n}\n\\`\\`\\`\n\nFields:\n* \\`${resultKey}\\` is ${resultFieldDescription}\n* \\`errors\\` is an optional array of error messages (if any)\n\nFor example, when an element is found:\n\\`\\`\\`json\n{\n \"${resultKey}\": ${exampleValueText},\n \"errors\": []\n}\n\\`\\`\\`\n\nWhen no element is found:\n\\`\\`\\`json\n{\n \"${resultKey}\": [],\n \"errors\": [\"I can see ..., but {some element} is not found. Use ${preferredLanguage}.\"]\n}\n\\`\\`\\`\n`;\n}\n\nexport const findElementPrompt = (targetElementDescription: string) =>\n `Find: ${targetElementDescription}`;\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","systemPromptToLocateElement","promptSpec","preferredLanguage","getPreferredLanguage","resultKey","exampleValueText","formatLocateExampleValue","resultFieldDescription","locateGroundingRules","findElementPrompt","targetElementDescription"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;ACDO,SAASI,4BACdC,UAAkC;IAElC,MAAMC,oBAAoBC,AAAAA,IAAAA,oBAAAA,oBAAAA,AAAAA;IAC1B,MAAMC,YAAYH,WAAW,SAAS;IACtC,MAAMI,mBAAmBC,AAAAA,IAAAA,iDAAAA,wBAAAA,AAAAA,EACvBL,WAAW,aAAa,CAAC,EAAE;IAE7B,MAAMM,yBAAyB,CAAC,IAAI,EAAEN,WAAW,UAAU,CAAC,mDAAmD,CAAC;IAChH,OAAO,CAAC;;;;;;;;AAQV,EAAEO,AAAAA,IAAAA,mDAAAA,oBAAAA,AAAAA,IAAuB;;;;;GAKtB,EAAEJ,UAAU,GAAG,EAAEH,WAAW,iBAAiB,CAAC,MAAM,EAAEA,WAAW,sBAAsB,CAAC;;;;;;IAMvF,EAAEG,UAAU,MAAM,EAAEG,uBAAuB;;;;;;GAM5C,EAAEH,UAAU,GAAG,EAAEC,iBAAiB;;;;;;;;GAQlC,EAAED,UAAU;kEACmD,EAAEF,kBAAkB;;;AAGtF,CAAC;AACD;AAEO,MAAMO,oBAAoB,CAACC,2BAChC,CAAC,MAAM,EAAEA,0BAA0B"}