@midscene/core 1.8.11 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (304) hide show
  1. package/dist/es/agent/agent.mjs +40 -50
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/task-builder.mjs +39 -19
  4. package/dist/es/agent/task-builder.mjs.map +1 -1
  5. package/dist/es/agent/tasks.mjs +24 -22
  6. package/dist/es/agent/tasks.mjs.map +1 -1
  7. package/dist/es/agent/utils.mjs +11 -14
  8. package/dist/es/agent/utils.mjs.map +1 -1
  9. package/dist/es/ai-model/connectivity.mjs +7 -3
  10. package/dist/es/ai-model/connectivity.mjs.map +1 -1
  11. package/dist/es/ai-model/errors.mjs +9 -0
  12. package/dist/es/ai-model/errors.mjs.map +1 -0
  13. package/dist/es/ai-model/index.mjs +3 -4
  14. package/dist/es/ai-model/inspect.mjs +132 -144
  15. package/dist/es/ai-model/inspect.mjs.map +1 -1
  16. package/dist/es/ai-model/llm-planning.mjs +46 -28
  17. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  18. package/dist/es/ai-model/{auto-glm → models/auto-glm}/actions.mjs +22 -44
  19. package/dist/es/ai-model/models/auto-glm/actions.mjs.map +1 -0
  20. package/dist/es/ai-model/models/auto-glm/adapter.mjs +45 -0
  21. package/dist/es/ai-model/models/auto-glm/adapter.mjs.map +1 -0
  22. package/dist/es/ai-model/models/auto-glm/locate.mjs +112 -0
  23. package/dist/es/ai-model/models/auto-glm/locate.mjs.map +1 -0
  24. package/dist/es/ai-model/models/auto-glm/parser.mjs.map +1 -0
  25. package/dist/es/ai-model/{auto-glm → models/auto-glm}/planning.mjs +6 -7
  26. package/dist/es/ai-model/models/auto-glm/planning.mjs.map +1 -0
  27. package/dist/es/ai-model/{auto-glm → models/auto-glm}/prompt.mjs +3 -11
  28. package/dist/es/ai-model/models/auto-glm/prompt.mjs.map +1 -0
  29. package/dist/es/ai-model/models/default.mjs +12 -0
  30. package/dist/es/ai-model/models/default.mjs.map +1 -0
  31. package/dist/es/ai-model/models/doubao.mjs +138 -0
  32. package/dist/es/ai-model/models/doubao.mjs.map +1 -0
  33. package/dist/es/ai-model/models/gemini.mjs +34 -0
  34. package/dist/es/ai-model/models/gemini.mjs.map +1 -0
  35. package/dist/es/ai-model/models/glm.mjs +37 -0
  36. package/dist/es/ai-model/models/glm.mjs.map +1 -0
  37. package/dist/es/ai-model/models/gpt.mjs +31 -0
  38. package/dist/es/ai-model/models/gpt.mjs.map +1 -0
  39. package/dist/es/ai-model/models/index.mjs +2 -0
  40. package/dist/es/ai-model/models/qwen.mjs +113 -0
  41. package/dist/es/ai-model/models/qwen.mjs.map +1 -0
  42. package/dist/es/ai-model/models/registry.mjs +45 -0
  43. package/dist/es/ai-model/models/registry.mjs.map +1 -0
  44. package/dist/es/ai-model/models/resolved.mjs +104 -0
  45. package/dist/es/ai-model/models/resolved.mjs.map +1 -0
  46. package/dist/es/ai-model/models/types.mjs +0 -0
  47. package/dist/es/ai-model/models/ui-tars/adapter.mjs +142 -0
  48. package/dist/es/ai-model/models/ui-tars/adapter.mjs.map +1 -0
  49. package/dist/es/ai-model/{ui-tars-planning.mjs → models/ui-tars/planning.mjs} +44 -62
  50. package/dist/es/ai-model/models/ui-tars/planning.mjs.map +1 -0
  51. package/dist/es/ai-model/prompt/extraction.mjs +3 -3
  52. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -1
  53. package/dist/es/ai-model/prompt/llm-locator.mjs +11 -11
  54. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
  55. package/dist/es/ai-model/prompt/llm-planning.mjs +25 -60
  56. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
  57. package/dist/es/ai-model/prompt/llm-section-locator.mjs +15 -10
  58. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
  59. package/dist/es/ai-model/prompt/locate-grounding-rules.mjs +9 -0
  60. package/dist/es/ai-model/prompt/locate-grounding-rules.mjs.map +1 -0
  61. package/dist/es/ai-model/prompt/locate-param-example.mjs +15 -0
  62. package/dist/es/ai-model/prompt/locate-param-example.mjs.map +1 -0
  63. package/dist/es/ai-model/prompt/playwright-generator.mjs +5 -5
  64. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
  65. package/dist/es/ai-model/prompt/yaml-generator.mjs +5 -5
  66. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
  67. package/dist/es/ai-model/prompts/locate-result-coordinates.mjs +107 -0
  68. package/dist/es/ai-model/prompts/locate-result-coordinates.mjs.map +1 -0
  69. package/dist/es/ai-model/service-caller/index.mjs +59 -190
  70. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  71. package/dist/es/ai-model/service-caller/json.mjs +60 -0
  72. package/dist/es/ai-model/service-caller/json.mjs.map +1 -0
  73. package/dist/es/ai-model/shared/model-locate-result/bbox.mjs +68 -0
  74. package/dist/es/ai-model/shared/model-locate-result/bbox.mjs.map +1 -0
  75. package/dist/es/ai-model/shared/model-locate-result/factory.mjs +96 -0
  76. package/dist/es/ai-model/shared/model-locate-result/factory.mjs.map +1 -0
  77. package/dist/es/ai-model/shared/model-locate-result/index.mjs +3 -0
  78. package/dist/es/ai-model/shared/model-locate-result/parse.mjs +41 -0
  79. package/dist/es/ai-model/shared/model-locate-result/parse.mjs.map +1 -0
  80. package/dist/es/ai-model/shared/model-locate-result/pixel-bbox-mapper.mjs +64 -0
  81. package/dist/es/ai-model/shared/model-locate-result/pixel-bbox-mapper.mjs.map +1 -0
  82. package/dist/es/ai-model/shared/model-locate-result/types.mjs +0 -0
  83. package/dist/es/ai-model/types.mjs +0 -0
  84. package/dist/es/ai-model/workflows/image-preprocess.mjs +27 -0
  85. package/dist/es/ai-model/workflows/image-preprocess.mjs.map +1 -0
  86. package/dist/es/ai-model/workflows/inspect/index.mjs +2 -0
  87. package/dist/es/ai-model/workflows/inspect/locate-result-rect.mjs +23 -0
  88. package/dist/es/ai-model/workflows/inspect/locate-result-rect.mjs.map +1 -0
  89. package/dist/es/ai-model/workflows/inspect/search-area-mapping.mjs +18 -0
  90. package/dist/es/ai-model/workflows/inspect/search-area-mapping.mjs.map +1 -0
  91. package/dist/es/ai-model/workflows/inspect/types.mjs +0 -0
  92. package/dist/es/ai-model/workflows/planning/index.mjs +5 -0
  93. package/dist/es/ai-model/workflows/planning/index.mjs.map +1 -0
  94. package/dist/es/ai-model/workflows/planning/types.mjs +0 -0
  95. package/dist/es/common.mjs +2 -174
  96. package/dist/es/common.mjs.map +1 -1
  97. package/dist/es/device/index.mjs.map +1 -1
  98. package/dist/es/service/index.mjs +96 -69
  99. package/dist/es/service/index.mjs.map +1 -1
  100. package/dist/es/types.mjs.map +1 -1
  101. package/dist/es/utils.mjs +2 -2
  102. package/dist/es/yaml/player.mjs +4 -3
  103. package/dist/es/yaml/player.mjs.map +1 -1
  104. package/dist/lib/agent/agent.js +43 -53
  105. package/dist/lib/agent/agent.js.map +1 -1
  106. package/dist/lib/agent/task-builder.js +38 -18
  107. package/dist/lib/agent/task-builder.js.map +1 -1
  108. package/dist/lib/agent/tasks.js +23 -21
  109. package/dist/lib/agent/tasks.js.map +1 -1
  110. package/dist/lib/agent/utils.js +17 -17
  111. package/dist/lib/agent/utils.js.map +1 -1
  112. package/dist/lib/ai-model/connectivity.js +7 -3
  113. package/dist/lib/ai-model/connectivity.js.map +1 -1
  114. package/dist/lib/ai-model/errors.js +46 -0
  115. package/dist/lib/ai-model/errors.js.map +1 -0
  116. package/dist/lib/ai-model/index.js +7 -14
  117. package/dist/lib/ai-model/inspect.js +141 -144
  118. package/dist/lib/ai-model/inspect.js.map +1 -1
  119. package/dist/lib/ai-model/llm-planning.js +44 -26
  120. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  121. package/dist/lib/ai-model/{auto-glm → models/auto-glm}/actions.js +22 -44
  122. package/dist/lib/ai-model/models/auto-glm/actions.js.map +1 -0
  123. package/dist/lib/ai-model/models/auto-glm/adapter.js +79 -0
  124. package/dist/lib/ai-model/models/auto-glm/adapter.js.map +1 -0
  125. package/dist/lib/ai-model/models/auto-glm/locate.js +146 -0
  126. package/dist/lib/ai-model/models/auto-glm/locate.js.map +1 -0
  127. package/dist/lib/ai-model/models/auto-glm/parser.js.map +1 -0
  128. package/dist/lib/ai-model/{auto-glm → models/auto-glm}/planning.js +8 -9
  129. package/dist/lib/ai-model/models/auto-glm/planning.js.map +1 -0
  130. package/dist/lib/ai-model/{auto-glm → models/auto-glm}/prompt.js +14 -16
  131. package/dist/lib/ai-model/models/auto-glm/prompt.js.map +1 -0
  132. package/dist/lib/ai-model/{auto-glm/util.js → models/default.js} +13 -13
  133. package/dist/lib/ai-model/models/default.js.map +1 -0
  134. package/dist/lib/ai-model/models/doubao.js +184 -0
  135. package/dist/lib/ai-model/models/doubao.js.map +1 -0
  136. package/dist/lib/ai-model/models/gemini.js +68 -0
  137. package/dist/lib/ai-model/models/gemini.js.map +1 -0
  138. package/dist/lib/ai-model/models/glm.js +71 -0
  139. package/dist/lib/ai-model/models/glm.js.map +1 -0
  140. package/dist/lib/ai-model/models/gpt.js +65 -0
  141. package/dist/lib/ai-model/models/gpt.js.map +1 -0
  142. package/dist/lib/ai-model/{service-caller/image-detail.js → models/index.js} +8 -7
  143. package/dist/lib/ai-model/models/index.js.map +1 -0
  144. package/dist/lib/ai-model/models/qwen.js +147 -0
  145. package/dist/lib/ai-model/models/qwen.js.map +1 -0
  146. package/dist/lib/ai-model/models/registry.js +85 -0
  147. package/dist/lib/ai-model/models/registry.js.map +1 -0
  148. package/dist/lib/ai-model/models/resolved.js +138 -0
  149. package/dist/lib/ai-model/models/resolved.js.map +1 -0
  150. package/dist/lib/ai-model/models/types.js +20 -0
  151. package/dist/lib/ai-model/models/types.js.map +1 -0
  152. package/dist/lib/ai-model/models/ui-tars/adapter.js +176 -0
  153. package/dist/lib/ai-model/models/ui-tars/adapter.js.map +1 -0
  154. package/dist/lib/ai-model/{ui-tars-planning.js → models/ui-tars/planning.js} +44 -62
  155. package/dist/lib/ai-model/models/ui-tars/planning.js.map +1 -0
  156. package/dist/lib/ai-model/prompt/extraction.js +3 -3
  157. package/dist/lib/ai-model/prompt/extraction.js.map +1 -1
  158. package/dist/lib/ai-model/prompt/llm-locator.js +11 -11
  159. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
  160. package/dist/lib/ai-model/prompt/llm-planning.js +25 -60
  161. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
  162. package/dist/lib/ai-model/prompt/llm-section-locator.js +15 -10
  163. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
  164. package/dist/lib/ai-model/prompt/locate-grounding-rules.js +43 -0
  165. package/dist/lib/ai-model/prompt/locate-grounding-rules.js.map +1 -0
  166. package/dist/lib/ai-model/prompt/locate-param-example.js +52 -0
  167. package/dist/lib/ai-model/prompt/locate-param-example.js.map +1 -0
  168. package/dist/lib/ai-model/prompt/playwright-generator.js +5 -5
  169. package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
  170. package/dist/lib/ai-model/prompt/yaml-generator.js +5 -5
  171. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
  172. package/dist/lib/ai-model/prompts/locate-result-coordinates.js +150 -0
  173. package/dist/lib/ai-model/prompts/locate-result-coordinates.js.map +1 -0
  174. package/dist/lib/ai-model/service-caller/index.js +68 -199
  175. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  176. package/dist/lib/ai-model/service-caller/json.js +100 -0
  177. package/dist/lib/ai-model/service-caller/json.js.map +1 -0
  178. package/dist/lib/ai-model/shared/model-locate-result/bbox.js +117 -0
  179. package/dist/lib/ai-model/shared/model-locate-result/bbox.js.map +1 -0
  180. package/dist/lib/ai-model/shared/model-locate-result/factory.js +130 -0
  181. package/dist/lib/ai-model/shared/model-locate-result/factory.js.map +1 -0
  182. package/dist/lib/ai-model/{prompt/common.js → shared/model-locate-result/index.js} +9 -9
  183. package/dist/lib/ai-model/shared/model-locate-result/index.js.map +1 -0
  184. package/dist/lib/ai-model/shared/model-locate-result/parse.js +78 -0
  185. package/dist/lib/ai-model/shared/model-locate-result/parse.js.map +1 -0
  186. package/dist/lib/ai-model/shared/model-locate-result/pixel-bbox-mapper.js +98 -0
  187. package/dist/lib/ai-model/shared/model-locate-result/pixel-bbox-mapper.js.map +1 -0
  188. package/dist/lib/ai-model/shared/model-locate-result/types.js +20 -0
  189. package/dist/lib/ai-model/shared/model-locate-result/types.js.map +1 -0
  190. package/dist/lib/ai-model/types.js +20 -0
  191. package/dist/lib/ai-model/types.js.map +1 -0
  192. package/dist/lib/ai-model/workflows/image-preprocess.js +61 -0
  193. package/dist/lib/ai-model/workflows/image-preprocess.js.map +1 -0
  194. package/dist/lib/ai-model/workflows/inspect/index.js +50 -0
  195. package/dist/lib/ai-model/workflows/inspect/index.js.map +1 -0
  196. package/dist/lib/ai-model/workflows/inspect/locate-result-rect.js +60 -0
  197. package/dist/lib/ai-model/workflows/inspect/locate-result-rect.js.map +1 -0
  198. package/dist/lib/ai-model/workflows/inspect/search-area-mapping.js +52 -0
  199. package/dist/lib/ai-model/workflows/inspect/search-area-mapping.js.map +1 -0
  200. package/dist/lib/ai-model/workflows/inspect/types.js +20 -0
  201. package/dist/lib/ai-model/workflows/inspect/types.js.map +1 -0
  202. package/dist/lib/ai-model/{model-family.js → workflows/planning/index.js} +6 -7
  203. package/dist/lib/ai-model/workflows/planning/index.js.map +1 -0
  204. package/dist/lib/ai-model/workflows/planning/types.js +20 -0
  205. package/dist/lib/ai-model/workflows/planning/types.js.map +1 -0
  206. package/dist/lib/common.js +4 -206
  207. package/dist/lib/common.js.map +1 -1
  208. package/dist/lib/device/index.js.map +1 -1
  209. package/dist/lib/service/index.js +96 -69
  210. package/dist/lib/service/index.js.map +1 -1
  211. package/dist/lib/types.js.map +1 -1
  212. package/dist/lib/utils.js +2 -2
  213. package/dist/lib/yaml/player.js +4 -3
  214. package/dist/lib/yaml/player.js.map +1 -1
  215. package/dist/types/agent/agent.d.ts +14 -6
  216. package/dist/types/agent/task-builder.d.ts +2 -2
  217. package/dist/types/agent/tasks.d.ts +6 -6
  218. package/dist/types/agent/utils.d.ts +8 -5
  219. package/dist/types/ai-model/errors.d.ts +2 -0
  220. package/dist/types/ai-model/index.d.ts +2 -4
  221. package/dist/types/ai-model/inspect.d.ts +13 -33
  222. package/dist/types/ai-model/llm-planning.d.ts +6 -17
  223. package/dist/types/ai-model/{auto-glm → models/auto-glm}/actions.d.ts +2 -2
  224. package/dist/types/ai-model/models/auto-glm/adapter.d.ts +5 -0
  225. package/dist/types/ai-model/models/auto-glm/locate.d.ts +3 -0
  226. package/dist/types/ai-model/models/auto-glm/planning.d.ts +3 -0
  227. package/dist/types/ai-model/models/auto-glm/prompt.d.ts +4 -0
  228. package/dist/types/ai-model/models/default.d.ts +2 -0
  229. package/dist/types/ai-model/models/doubao.d.ts +10 -0
  230. package/dist/types/ai-model/models/gemini.d.ts +18 -0
  231. package/dist/types/ai-model/models/glm.d.ts +18 -0
  232. package/dist/types/ai-model/models/gpt.d.ts +18 -0
  233. package/dist/types/ai-model/models/index.d.ts +2 -0
  234. package/dist/types/ai-model/models/qwen.d.ts +30 -0
  235. package/dist/types/ai-model/models/registry.d.ts +81 -0
  236. package/dist/types/ai-model/models/resolved.d.ts +9 -0
  237. package/dist/types/ai-model/models/types.d.ts +102 -0
  238. package/dist/types/ai-model/models/ui-tars/adapter.d.ts +6 -0
  239. package/dist/types/ai-model/{ui-tars-planning.d.ts → models/ui-tars/planning.d.ts} +7 -11
  240. package/dist/types/ai-model/prompt/llm-locator.d.ts +2 -2
  241. package/dist/types/ai-model/prompt/llm-planning.d.ts +5 -5
  242. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +2 -2
  243. package/dist/types/ai-model/prompt/locate-grounding-rules.d.ts +1 -0
  244. package/dist/types/ai-model/prompt/locate-param-example.d.ts +3 -0
  245. package/dist/types/ai-model/prompt/playwright-generator.d.ts +3 -3
  246. package/dist/types/ai-model/prompt/yaml-generator.d.ts +3 -3
  247. package/dist/types/ai-model/prompts/locate-result-coordinates.d.ts +6 -0
  248. package/dist/types/ai-model/service-caller/index.d.ts +19 -27
  249. package/dist/types/ai-model/service-caller/json.d.ts +9 -0
  250. package/dist/types/ai-model/shared/model-locate-result/bbox.d.ts +7 -0
  251. package/dist/types/ai-model/shared/model-locate-result/factory.d.ts +2 -0
  252. package/dist/types/ai-model/shared/model-locate-result/index.d.ts +3 -0
  253. package/dist/types/ai-model/shared/model-locate-result/parse.d.ts +5 -0
  254. package/dist/types/ai-model/shared/model-locate-result/pixel-bbox-mapper.d.ts +7 -0
  255. package/dist/types/ai-model/shared/model-locate-result/types.d.ts +157 -0
  256. package/dist/types/ai-model/types.d.ts +2 -0
  257. package/dist/types/ai-model/workflows/image-preprocess.d.ts +30 -0
  258. package/dist/types/ai-model/workflows/inspect/index.d.ts +1 -0
  259. package/dist/types/ai-model/workflows/inspect/locate-result-rect.d.ts +4 -0
  260. package/dist/types/ai-model/workflows/inspect/search-area-mapping.d.ts +3 -0
  261. package/dist/types/ai-model/workflows/inspect/types.d.ts +37 -0
  262. package/dist/types/ai-model/workflows/planning/index.d.ts +2 -0
  263. package/dist/types/ai-model/workflows/planning/types.d.ts +15 -0
  264. package/dist/types/common.d.ts +0 -30
  265. package/dist/types/device/index.d.ts +22 -22
  266. package/dist/types/service/index.d.ts +5 -4
  267. package/dist/types/types.d.ts +21 -9
  268. package/dist/types/yaml.d.ts +8 -2
  269. package/package.json +2 -2
  270. package/dist/es/ai-model/auto-glm/actions.mjs.map +0 -1
  271. package/dist/es/ai-model/auto-glm/index.mjs +0 -6
  272. package/dist/es/ai-model/auto-glm/parser.mjs.map +0 -1
  273. package/dist/es/ai-model/auto-glm/planning.mjs.map +0 -1
  274. package/dist/es/ai-model/auto-glm/prompt.mjs.map +0 -1
  275. package/dist/es/ai-model/auto-glm/util.mjs +0 -9
  276. package/dist/es/ai-model/auto-glm/util.mjs.map +0 -1
  277. package/dist/es/ai-model/model-family.mjs +0 -6
  278. package/dist/es/ai-model/model-family.mjs.map +0 -1
  279. package/dist/es/ai-model/prompt/common.mjs +0 -8
  280. package/dist/es/ai-model/prompt/common.mjs.map +0 -1
  281. package/dist/es/ai-model/service-caller/image-detail.mjs +0 -6
  282. package/dist/es/ai-model/service-caller/image-detail.mjs.map +0 -1
  283. package/dist/es/ai-model/ui-tars-planning.mjs.map +0 -1
  284. package/dist/lib/ai-model/auto-glm/actions.js.map +0 -1
  285. package/dist/lib/ai-model/auto-glm/index.js +0 -66
  286. package/dist/lib/ai-model/auto-glm/index.js.map +0 -1
  287. package/dist/lib/ai-model/auto-glm/parser.js.map +0 -1
  288. package/dist/lib/ai-model/auto-glm/planning.js.map +0 -1
  289. package/dist/lib/ai-model/auto-glm/prompt.js.map +0 -1
  290. package/dist/lib/ai-model/auto-glm/util.js.map +0 -1
  291. package/dist/lib/ai-model/model-family.js.map +0 -1
  292. package/dist/lib/ai-model/prompt/common.js.map +0 -1
  293. package/dist/lib/ai-model/service-caller/image-detail.js.map +0 -1
  294. package/dist/lib/ai-model/ui-tars-planning.js.map +0 -1
  295. package/dist/types/ai-model/auto-glm/index.d.ts +0 -6
  296. package/dist/types/ai-model/auto-glm/planning.d.ts +0 -12
  297. package/dist/types/ai-model/auto-glm/prompt.d.ts +0 -27
  298. package/dist/types/ai-model/auto-glm/util.d.ts +0 -13
  299. package/dist/types/ai-model/model-family.d.ts +0 -7
  300. package/dist/types/ai-model/prompt/common.d.ts +0 -2
  301. package/dist/types/ai-model/service-caller/image-detail.d.ts +0 -2
  302. /package/dist/es/ai-model/{auto-glm → models/auto-glm}/parser.mjs +0 -0
  303. /package/dist/lib/ai-model/{auto-glm → models/auto-glm}/parser.js +0 -0
  304. /package/dist/types/ai-model/{auto-glm → models/auto-glm}/parser.d.ts +0 -0
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/inspect.mjs","sources":["../../../src/ai-model/inspect.ts"],"sourcesContent":["import type {\n AIDataExtractionResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport {\n generateElementByPoint,\n generateElementByRect,\n} from '@midscene/shared/extractor/dom-util';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n scaleImage,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from '../common';\nimport { adaptBboxToRect, expandSearchArea, mergeRects } from '../common';\nimport { parseAutoGLMLocateResponse } from './auto-glm/parser';\nimport { getAutoGLMLocatePrompt } from './auto-glm/prompt';\nimport { isAutoGLM } from './auto-glm/util';\nimport {\n extractDataQueryPrompt,\n parseXMLExtractionResponse,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport {\n AIResponseParseError,\n callAI,\n callAIWithObjectResponse,\n callAIWithStringResponse,\n} from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nexport async function buildSearchAreaConfig(options: {\n context: UIContext;\n baseRect: Rect;\n modelFamily: IModelConfig['modelFamily'];\n}): Promise<{ rect: Rect; imageBase64: string; scale: number }> {\n const { context, baseRect, modelFamily } = options;\n const scaleRatio = 2;\n const sectionRect = expandSearchArea(baseRect, context.shotSize);\n\n const croppedResult = await cropByRect(\n context.screenshot.base64,\n sectionRect,\n modelFamily === 'qwen2.5-vl',\n );\n\n const scaledResult = await scaleImage(croppedResult.imageBase64, scaleRatio);\n sectionRect.width = scaledResult.width;\n sectionRect.height = scaledResult.height;\n return {\n rect: sectionRect,\n imageBase64: scaledResult.imageBase64,\n scale: scaleRatio,\n };\n}\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images. These reference images are supporting context only, not the current screenshot being evaluated, unless the task explicitly asks for comparison or matching.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the reference image named '${item.name}'. It is a reference image, not the current screenshot:`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement(options: {\n context: UIContext;\n targetElementDescription: TUserPrompt;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n abortSignal?: AbortSignal;\n}): Promise<{\n parseResult: {\n elements: LocateResultElement[];\n errors?: string[];\n };\n rect?: Rect;\n rawResponse: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const { context, targetElementDescription, modelConfig } = options;\n const { modelFamily } = modelConfig;\n const screenshotBase64 = context.screenshot.base64;\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const targetElementDescriptionText = extraTextFromUserPrompt(\n targetElementDescription,\n );\n const userInstructionPrompt = findElementPrompt(targetElementDescriptionText);\n const systemPrompt = isAutoGLM(modelFamily)\n ? getAutoGLMLocatePrompt(modelFamily)\n : systemPromptToLocateElement(modelFamily);\n\n let imagePayload = screenshotBase64;\n let imageWidth = context.shotSize.width;\n let imageHeight = context.shotSize.height;\n let originalImageWidth = imageWidth;\n let originalImageHeight = imageHeight;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n imageWidth = options.searchConfig.rect?.width;\n imageHeight = options.searchConfig.rect?.height;\n originalImageWidth = imageWidth;\n originalImageHeight = imageHeight;\n } else if (modelFamily === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: isAutoGLM(modelFamily)\n ? `Tap: ${userInstructionPrompt}`\n : userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n if (isAutoGLM(modelFamily)) {\n const { content: rawResponseContent, usage } =\n await callAIWithStringResponse(msgs, modelConfig, {\n abortSignal: options.abortSignal,\n });\n\n debugInspect('auto-glm rawResponse:', rawResponseContent);\n\n const parsed = parseAutoGLMLocateResponse(rawResponseContent);\n\n debugInspect('auto-glm thinking:', parsed.think);\n debugInspect('auto-glm coordinates:', parsed.coordinates);\n\n let resRect: Rect | undefined;\n let matchedElements: LocateResultElement[] = [];\n let errors: string[] = [];\n\n if (parsed.error || !parsed.coordinates) {\n errors = [parsed.error || 'Failed to parse auto-glm response'];\n debugInspect('auto-glm parse error:', errors[0]);\n } else {\n const { x, y } = parsed.coordinates;\n\n debugInspect('auto-glm coordinates [0-999]:', { x, y });\n\n // Convert auto-glm coordinates [0,999] to pixel bbox\n // Map from [0,999] to pixel coordinates\n const pixelX = Math.round((x * imageWidth) / 1000);\n const pixelY = Math.round((y * imageHeight) / 1000);\n\n debugInspect('auto-glm pixel coordinates:', { pixelX, pixelY });\n\n // Apply offset if searching in a cropped area\n let finalX = pixelX;\n let finalY = pixelY;\n if (options.searchConfig?.rect) {\n finalX += options.searchConfig.rect.left;\n finalY += options.searchConfig.rect.top;\n }\n\n const element: LocateResultElement = generateElementByPoint(\n [finalX, finalY],\n targetElementDescriptionText as string,\n );\n\n resRect = element.rect;\n debugInspect('auto-glm resRect:', resRect);\n\n if (element) {\n matchedElements = [element];\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements,\n errors,\n },\n rawResponse: rawResponseContent,\n usage,\n reasoning_content: parsed.think,\n };\n }\n\n let res: Awaited<\n ReturnType<\n typeof callAIWithObjectResponse<AIElementResponse | [number, number]>\n >\n >;\n try {\n res = await callAIWithObjectResponse<AIElementResponse | [number, number]>(\n msgs,\n modelConfig,\n {\n abortSignal: options.abortSignal,\n },\n );\n } catch (callError) {\n // Return error with usage and rawResponse if available\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n rect: undefined,\n parseResult: {\n elements: [],\n errors: [`AI call error: ${errorMessage}`],\n },\n rawResponse,\n usage,\n reasoning_content: undefined,\n };\n }\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements: LocateResultElement[] = [];\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if (\n 'bbox' in res.content &&\n Array.isArray(res.content.bbox) &&\n res.content.bbox.length >= 1\n ) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n imageWidth,\n imageHeight,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n originalImageWidth,\n originalImageHeight,\n modelFamily,\n options.searchConfig?.scale,\n );\n\n debugInspect('resRect', resRect);\n\n const element: LocateResultElement = generateElementByRect(\n resRect,\n targetElementDescriptionText as string,\n );\n errors = [];\n\n if (element) {\n matchedElements = [element];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements as LocateResultElement[],\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n abortSignal?: AbortSignal;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n scale?: number;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { modelFamily } = modelConfig;\n const screenshotBase64 = context.screenshot.base64;\n\n const systemPrompt = systemPromptToLocateSection(modelFamily);\n const sectionLocatorInstructionText = sectionLocatorInstruction(\n extraTextFromUserPrompt(sectionDescription),\n );\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n let result: Awaited<\n ReturnType<typeof callAIWithObjectResponse<AISectionLocatorResponse>>\n >;\n try {\n result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n modelConfig,\n {\n abortSignal: options.abortSignal,\n },\n );\n } catch (callError) {\n // Return error with usage and rawResponse if available\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n rect: undefined,\n imageBase64: undefined,\n error: `AI call error: ${errorMessage}`,\n rawResponse,\n usage,\n };\n }\n\n let searchAreaConfig:\n | Awaited<ReturnType<typeof buildSearchAreaConfig>>\n | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.shotSize.width,\n context.shotSize.height,\n 0,\n 0,\n context.shotSize.width,\n context.shotSize.height,\n modelFamily,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.shotSize.width,\n context.shotSize.height,\n 0,\n 0,\n context.shotSize.width,\n context.shotSize.height,\n modelFamily,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n const expandedRect = expandSearchArea(mergedRect, context.shotSize);\n const originalWidth = expandedRect.width;\n const originalHeight = expandedRect.height;\n debugSection('expanded sectionRect %j', expandedRect);\n\n searchAreaConfig = await buildSearchAreaConfig({\n context,\n baseRect: mergedRect,\n modelFamily,\n });\n\n debugSection(\n 'scaled sectionRect from %dx%d to %dx%d (scale=%d)',\n originalWidth,\n originalHeight,\n searchAreaConfig.rect.width,\n searchAreaConfig.rect.height,\n searchAreaConfig.scale,\n );\n }\n\n return {\n rect: searchAreaConfig?.rect,\n imageBase64: searchAreaConfig?.imageBase64,\n scale: searchAreaConfig?.scale,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const systemPrompt = systemPromptToExtract({\n screenshotIncluded: extractOption?.screenshotIncluded !== false,\n referenceImagesIncluded: !!multimodalPrompt?.images?.length,\n });\n const screenshotBase64 = context.screenshot.base64;\n\n const extractDataPromptText = extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'text',\n text: 'This is the current screenshot to evaluate. Unless <DATA_DEMAND> explicitly asks for comparison or matching against reference images, base your answer on this screenshot and its contents when provided.',\n });\n\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelConfig);\n\n // Parse XML response to JSON object\n let parseResult: AIDataExtractionResponse<T>;\n try {\n parseResult = parseXMLExtractionResponse<T>(rawResponse);\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n\n return {\n parseResult,\n rawResponse,\n usage,\n reasoning_content,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n callAIFn: typeof callAIWithObjectResponse<{ isOrderSensitive: boolean }>,\n modelConfig: IModelConfig,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n debugInspect('AiJudgeOrderSensitive: description=%s', description);\n\n const result = await callAIFn(msgs, modelConfig);\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["debugInspect","getDebug","debugSection","buildSearchAreaConfig","options","context","baseRect","modelFamily","scaleRatio","sectionRect","expandSearchArea","croppedResult","cropByRect","scaledResult","scaleImage","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","msgs","item","base64","preProcessImageUrl","AiLocateElement","targetElementDescription","modelConfig","screenshotBase64","assert","targetElementDescriptionText","userInstructionPrompt","findElementPrompt","systemPrompt","isAutoGLM","getAutoGLMLocatePrompt","systemPromptToLocateElement","imagePayload","imageWidth","imageHeight","originalImageWidth","originalImageHeight","paddedResult","paddingToMatchBlockByBase64","addOns","rawResponseContent","usage","callAIWithStringResponse","parsed","parseAutoGLMLocateResponse","resRect","matchedElements","errors","x","y","pixelX","Math","pixelY","finalX","finalY","element","generateElementByPoint","res","callAIWithObjectResponse","callError","errorMessage","Error","String","rawResponse","AIResponseParseError","undefined","JSON","Array","adaptBboxToRect","generateElementByRect","e","msg","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","searchAreaConfig","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandedRect","originalWidth","originalHeight","AiExtractElementInfo","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","reasoning_content","callAI","parseResult","parseXMLExtractionResponse","parseError","AiJudgeOrderSensitive","description","callAIFn","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;;;;;;;;;;;AA6DA,MAAMA,eAAeC,SAAS;AAC9B,MAAMC,eAAeD,SAAS;AAEvB,eAAeE,sBAAsBC,OAI3C;IACC,MAAM,EAAEC,OAAO,EAAEC,QAAQ,EAAEC,WAAW,EAAE,GAAGH;IAC3C,MAAMI,aAAa;IACnB,MAAMC,cAAcC,iBAAiBJ,UAAUD,QAAQ,QAAQ;IAE/D,MAAMM,gBAAgB,MAAMC,WAC1BP,QAAQ,UAAU,CAAC,MAAM,EACzBI,aACAF,AAAgB,iBAAhBA;IAGF,MAAMM,eAAe,MAAMC,WAAWH,cAAc,WAAW,EAAEH;IACjEC,YAAY,KAAK,GAAGI,aAAa,KAAK;IACtCJ,YAAY,MAAM,GAAGI,aAAa,MAAM;IACxC,OAAO;QACL,MAAMJ;QACN,aAAaI,aAAa,WAAW;QACrC,OAAOL;IACT;AACF;AAEA,MAAMO,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;IAEA,MAAMC,OAAyC,EAAE;IACjD,IAAID,kBAAkB,QAAQ,QAAQ;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQF,iBAAiB,MAAM,CAAE;YAC1C,MAAMG,SAAS,MAAMC,mBACnBF,KAAK,GAAG,EACR,CAAC,CAACF,iBAAiB,uBAAuB;YAG5CC,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,mCAAmC,EAAEC,KAAK,IAAI,CAAC,uDAAuD,CAAC;oBAChH;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAAgBnB,OAMrC;IAUC,MAAM,EAAEC,OAAO,EAAEmB,wBAAwB,EAAEC,WAAW,EAAE,GAAGrB;IAC3D,MAAM,EAAEG,WAAW,EAAE,GAAGkB;IACxB,MAAMC,mBAAmBrB,QAAQ,UAAU,CAAC,MAAM;IAElDsB,OACEH,0BACA;IAEF,MAAMI,+BAA+Bb,wBACnCS;IAEF,MAAMK,wBAAwBC,kBAAkBF;IAChD,MAAMG,eAAeC,UAAUzB,eAC3B0B,uBAAuB1B,eACvB2B,4BAA4B3B;IAEhC,IAAI4B,eAAeT;IACnB,IAAIU,aAAa/B,QAAQ,QAAQ,CAAC,KAAK;IACvC,IAAIgC,cAAchC,QAAQ,QAAQ,CAAC,MAAM;IACzC,IAAIiC,qBAAqBF;IACzB,IAAIG,sBAAsBF;IAE1B,IAAIjC,QAAQ,YAAY,EAAE;QACxBuB,OACEvB,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFuB,OACEvB,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGF+B,eAAe/B,QAAQ,YAAY,CAAC,WAAW;QAC/CgC,aAAahC,QAAQ,YAAY,CAAC,IAAI,EAAE;QACxCiC,cAAcjC,QAAQ,YAAY,CAAC,IAAI,EAAE;QACzCkC,qBAAqBF;QACrBG,sBAAsBF;IACxB,OAAO,IAAI9B,AAAgB,iBAAhBA,aAA8B;QACvC,MAAMiC,eAAe,MAAMC,4BAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAMrB,OAAe;QACnB;YAAE,MAAM;YAAU,SAASY;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKI;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMH,UAAUzB,eACZ,CAAC,KAAK,EAAEsB,uBAAuB,GAC/BA;gBACN;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAOL,0BAAuC;QAChD,MAAMkB,SAAS,MAAMzB,mBAAmB;YACtC,QAAQO,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAL,KAAK,IAAI,IAAIuB;IACf;IAEA,IAAIV,UAAUzB,cAAc;QAC1B,MAAM,EAAE,SAASoC,kBAAkB,EAAEC,KAAK,EAAE,GAC1C,MAAMC,yBAAyB1B,MAAMM,aAAa;YAChD,aAAarB,QAAQ,WAAW;QAClC;QAEFJ,aAAa,yBAAyB2C;QAEtC,MAAMG,SAASC,2BAA2BJ;QAE1C3C,aAAa,sBAAsB8C,OAAO,KAAK;QAC/C9C,aAAa,yBAAyB8C,OAAO,WAAW;QAExD,IAAIE;QACJ,IAAIC,kBAAyC,EAAE;QAC/C,IAAIC,SAAmB,EAAE;QAEzB,IAAIJ,OAAO,KAAK,IAAI,CAACA,OAAO,WAAW,EAAE;YACvCI,SAAS;gBAACJ,OAAO,KAAK,IAAI;aAAoC;YAC9D9C,aAAa,yBAAyBkD,MAAM,CAAC,EAAE;QACjD,OAAO;YACL,MAAM,EAAEC,CAAC,EAAEC,CAAC,EAAE,GAAGN,OAAO,WAAW;YAEnC9C,aAAa,iCAAiC;gBAAEmD;gBAAGC;YAAE;YAIrD,MAAMC,SAASC,KAAK,KAAK,CAAEH,IAAIf,aAAc;YAC7C,MAAMmB,SAASD,KAAK,KAAK,CAAEF,IAAIf,cAAe;YAE9CrC,aAAa,+BAA+B;gBAAEqD;gBAAQE;YAAO;YAG7D,IAAIC,SAASH;YACb,IAAII,SAASF;YACb,IAAInD,QAAQ,YAAY,EAAE,MAAM;gBAC9BoD,UAAUpD,QAAQ,YAAY,CAAC,IAAI,CAAC,IAAI;gBACxCqD,UAAUrD,QAAQ,YAAY,CAAC,IAAI,CAAC,GAAG;YACzC;YAEA,MAAMsD,UAA+BC,uBACnC;gBAACH;gBAAQC;aAAO,EAChB7B;YAGFoB,UAAUU,QAAQ,IAAI;YACtB1D,aAAa,qBAAqBgD;YAElC,IAAIU,SACFT,kBAAkB;gBAACS;aAAQ;QAE/B;QAEA,OAAO;YACL,MAAMV;YACN,aAAa;gBACX,UAAUC;gBACVC;YACF;YACA,aAAaP;YACbC;YACA,mBAAmBE,OAAO,KAAK;QACjC;IACF;IAEA,IAAIc;IAKJ,IAAI;QACFA,MAAM,MAAMC,yBACV1C,MACAM,aACA;YACE,aAAarB,QAAQ,WAAW;QAClC;IAEJ,EAAE,OAAO0D,WAAW;QAElB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,uBACjBL,UAAU,WAAW,GACrBC;QACN,MAAMnB,QACJkB,qBAAqBK,uBAAuBL,UAAU,KAAK,GAAGM;QAChE,OAAO;YACL,MAAMA;YACN,aAAa;gBACX,UAAU,EAAE;gBACZ,QAAQ;oBAAC,CAAC,eAAe,EAAEL,cAAc;iBAAC;YAC5C;YACAG;YACAtB;YACA,mBAAmBwB;QACrB;IACF;IAEA,MAAMF,cAAcG,KAAK,SAAS,CAACT,IAAI,OAAO;IAE9C,IAAIZ;IACJ,IAAIC,kBAAyC,EAAE;IAC/C,IAAIC,SACF,YAAYU,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IACE,UAAUA,IAAI,OAAO,IACrBU,MAAM,OAAO,CAACV,IAAI,OAAO,CAAC,IAAI,KAC9BA,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,GAC3B;YACAZ,UAAUuB,gBACRX,IAAI,OAAO,CAAC,IAAI,EAChBxB,YACAC,aACAjC,QAAQ,YAAY,EAAE,MAAM,MAC5BA,QAAQ,YAAY,EAAE,MAAM,KAC5BkC,oBACAC,qBACAhC,aACAH,QAAQ,YAAY,EAAE;YAGxBJ,aAAa,WAAWgD;YAExB,MAAMU,UAA+Bc,sBACnCxB,SACApB;YAEFsB,SAAS,EAAE;YAEX,IAAIQ,SACFT,kBAAkB;gBAACS;aAAQ;QAE/B;IACF,EAAE,OAAOe,GAAG;QACV,MAAMC,MACJD,aAAaT,QACT,CAAC,sBAAsB,EAAES,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACvB,UAAUA,QAAQ,WAAW,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEwB,IAAI,CAAC,CAAC;aAFtBxB,SAAS;YAACwB;SAAI;IAIlB;IAEA,OAAO;QACL,MAAM1B;QACN,aAAa;YACX,UAAUC;YACV,QAAQC;QACV;QACAgB;QACA,OAAON,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;AACF;AAEO,eAAee,gBAAgBvE,OAKrC;IAQC,MAAM,EAAEC,OAAO,EAAEuE,kBAAkB,EAAEnD,WAAW,EAAE,GAAGrB;IACrD,MAAM,EAAEG,WAAW,EAAE,GAAGkB;IACxB,MAAMC,mBAAmBrB,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM0B,eAAe8C,4BAA4BtE;IACjD,MAAMuE,gCAAgCC,0BACpChE,wBAAwB6D;IAE1B,MAAMzD,OAAe;QACnB;YAAE,MAAM;YAAU,SAASY;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMoD;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMlC,SAAS,MAAMzB,mBAAmB;YACtC,QAAQ2D,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACAzD,KAAK,IAAI,IAAIuB;IACf;IAEA,IAAIsC;IAGJ,IAAI;QACFA,SAAS,MAAMnB,yBACb1C,MACAM,aACA;YACE,aAAarB,QAAQ,WAAW;QAClC;IAEJ,EAAE,OAAO0D,WAAW;QAElB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,uBACjBL,UAAU,WAAW,GACrBC;QACN,MAAMnB,QACJkB,qBAAqBK,uBAAuBL,UAAU,KAAK,GAAGM;QAChE,OAAO;YACL,MAAMA;YACN,aAAaA;YACb,OAAO,CAAC,eAAe,EAAEL,cAAc;YACvCG;YACAtB;QACF;IACF;IAEA,IAAIqC;IAGJ,MAAMC,cAAcF,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIE,aAAa;QACf,MAAMC,aAAaZ,gBACjBW,aACA7E,QAAQ,QAAQ,CAAC,KAAK,EACtBA,QAAQ,QAAQ,CAAC,MAAM,EACvB,GACA,GACAA,QAAQ,QAAQ,CAAC,KAAK,EACtBA,QAAQ,QAAQ,CAAC,MAAM,EACvBE;QAEFL,aAAa,0BAA0BiF;QAEvC,MAAMC,oBAAoBJ,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9D9E,aAAa,wBAAwBkF;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAAShB,MAAM,OAAO,CAACgB,OAC/B,GAAG,CAAC,CAACA,OACGf,gBACLe,MACAjF,QAAQ,QAAQ,CAAC,KAAK,EACtBA,QAAQ,QAAQ,CAAC,MAAM,EACvB,GACA,GACAA,QAAQ,QAAQ,CAAC,KAAK,EACtBA,QAAQ,QAAQ,CAAC,MAAM,EACvBE;QAGNL,aAAa,qBAAqBmF;QAGlC,MAAME,aAAaC,WAAW;YAACL;eAAeE;SAAe;QAC7DnF,aAAa,iBAAiBqF;QAE9B,MAAME,eAAe/E,iBAAiB6E,YAAYlF,QAAQ,QAAQ;QAClE,MAAMqF,gBAAgBD,aAAa,KAAK;QACxC,MAAME,iBAAiBF,aAAa,MAAM;QAC1CvF,aAAa,2BAA2BuF;QAExCR,mBAAmB,MAAM9E,sBAAsB;YAC7CE;YACA,UAAUkF;YACVhF;QACF;QAEAL,aACE,qDACAwF,eACAC,gBACAV,iBAAiB,IAAI,CAAC,KAAK,EAC3BA,iBAAiB,IAAI,CAAC,MAAM,EAC5BA,iBAAiB,KAAK;IAE1B;IAEA,OAAO;QACL,MAAMA,kBAAkB;QACxB,aAAaA,kBAAkB;QAC/B,OAAOA,kBAAkB;QACzB,OAAOD,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAaX,KAAK,SAAS,CAACW,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAeY,qBAAwBxF,OAO7C;IACC,MAAM,EAAEyF,SAAS,EAAExF,OAAO,EAAEyF,aAAa,EAAE5E,gBAAgB,EAAEO,WAAW,EAAE,GACxErB;IACF,MAAM2B,eAAegE,sBAAsB;QACzC,oBAAoBD,eAAe,uBAAuB;QAC1D,yBAAyB,CAAC,CAAC5E,kBAAkB,QAAQ;IACvD;IACA,MAAMQ,mBAAmBrB,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM2F,wBAAwBC,uBAC5B7F,QAAQ,eAAe,IAAI,IAC3ByF;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,eAAe,uBAAuB,OAAO;QAC/CI,YAAY,IAAI,CAAC;YACf,MAAM;YACN,MAAM;QACR;QAEAA,YAAY,IAAI,CAAC;YACf,MAAM;YACN,WAAW;gBACT,KAAKxE;gBACL,QAAQ;YACV;QACF;IACF;IAEAwE,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAM7E,OAAe;QACnB;YAAE,MAAM;YAAU,SAASY;QAAa;QACxC;YACE,MAAM;YACN,SAASmE;QACX;KACD;IAED,IAAIhF,kBAAkB;QACpB,MAAMwB,SAAS,MAAMzB,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAC,KAAK,IAAI,IAAIuB;IACf;IAEA,MAAM,EACJ,SAASwB,WAAW,EACpBtB,KAAK,EACLuD,iBAAiB,EAClB,GAAG,MAAMC,OAAOjF,MAAMM;IAGvB,IAAI4E;IACJ,IAAI;QACFA,cAAcC,2BAA8BpC;IAC9C,EAAE,OAAOqC,YAAY;QAEnB,MAAMxC,eACJwC,sBAAsBvC,QAAQuC,WAAW,OAAO,GAAGtC,OAAOsC;QAC5D,MAAM,IAAIpC,qBACR,CAAC,iBAAiB,EAAEJ,cAAc,EAClCG,aACAtB;IAEJ;IAEA,OAAO;QACLyD;QACAnC;QACAtB;QACAuD;IACF;AACF;AAEO,eAAeK,sBACpBC,WAAmB,EACnBC,QAAwE,EACxEjF,WAAyB;IAKzB,MAAMM,eAAe4E;IACrB,MAAMC,aAAaC,0BAA0BJ;IAE7C,MAAMtF,OAAe;QACnB;YAAE,MAAM;YAAU,SAASY;QAAa;QACxC;YACE,MAAM;YACN,SAAS6E;QACX;KACD;IAED5G,aAAa,yCAAyCyG;IAEtD,MAAMzB,SAAS,MAAM0B,SAASvF,MAAMM;IAEpC,OAAO;QACL,kBAAkBuD,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
1
+ {"version":3,"file":"ai-model/inspect.mjs","sources":["../../../src/ai-model/inspect.ts"],"sourcesContent":["import type {\n AIDataExtractionResponse,\n AIElementLocateResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport { generateElementByRect } from '@midscene/shared/extractor';\nimport {\n cropByRect,\n preProcessImageUrl,\n scaleImage,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from '../common';\nimport { expandSearchArea } from '../common';\nimport type { ModelRuntime } from './models';\nimport {\n extractDataQueryPrompt,\n parseXMLExtractionResponse,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport {\n AIResponseParseError,\n callAI,\n callAIWithObjectResponse,\n} from './service-caller/index';\nimport { prepareModelImage } from './workflows/image-preprocess';\nimport {\n mergePixelBboxesToRect,\n pixelBboxToRect,\n} from './workflows/inspect/locate-result-rect';\nimport { mapSearchAreaPixelBboxToOriginalPixelBbox } from './workflows/inspect/search-area-mapping';\nimport type {\n LocateOptions,\n LocateResult,\n SearchAreaConfig,\n} from './workflows/inspect/types';\n\nexport type InspectAIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nfunction hasLocateResult(input: unknown, resultKey: string) {\n if (!input || typeof input !== 'object') {\n return false;\n }\n\n const record = input as Record<string, unknown>;\n const locateResult = record[resultKey];\n return Array.isArray(locateResult)\n ? locateResult.length > 0\n : locateResult !== undefined;\n}\n\nexport async function buildSearchAreaConfig(options: {\n context: UIContext;\n baseRect: Rect;\n}): Promise<SearchAreaConfig> {\n const { context, baseRect } = options;\n const scaleRatio = 2;\n const sectionRect = expandSearchArea(baseRect, context.shotSize);\n\n const croppedResult = await cropByRect(\n context.screenshot.base64,\n sectionRect,\n );\n\n const scaledResult = await scaleImage(croppedResult.imageBase64, scaleRatio);\n return {\n sourceRect: sectionRect,\n image: {\n imageBase64: scaledResult.imageBase64,\n width: scaledResult.width,\n height: scaledResult.height,\n },\n mapping: {\n offset: {\n x: sectionRect.left,\n y: sectionRect.top,\n },\n scale: scaleRatio,\n },\n };\n}\n\nexport const extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n }\n return prompt.prompt;\n};\n\nexport const promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images. These reference images are supporting context only, not the current screenshot being evaluated, unless the task explicitly asks for comparison or matching.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the reference image named '${item.name}'. It is a reference image, not the current screenshot:`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement(\n options: LocateOptions & { targetElementDescription: TUserPrompt },\n): Promise<LocateResult> {\n const { targetElementDescription, ...locateOptions } = options;\n const locateAdapter = options.modelRuntime.adapter.locate;\n if (locateAdapter.kind === 'custom') {\n return locateAdapter.locateFn(targetElementDescription, locateOptions);\n }\n return genericLocate(targetElementDescription, locateOptions);\n}\n\nexport async function genericLocate(\n elementDescription: TUserPrompt,\n options: LocateOptions,\n): Promise<LocateResult> {\n const { context } = options;\n const modelRuntime = options.modelRuntime;\n const { adapter } = modelRuntime;\n assert(\n adapter.locate.kind === 'standard',\n 'generic locate requires a standard locate adapter',\n );\n const screenshotBase64 = context.screenshot.base64;\n\n assert(elementDescription, 'cannot find the target element description');\n const elementDescriptionText = extraTextFromUserPrompt(elementDescription);\n const userInstructionPrompt = findElementPrompt(elementDescriptionText);\n const systemPrompt = systemPromptToLocateElement(\n adapter.locate.resultAdapter.promptSpec,\n );\n\n const modelImage = options.searchConfig?.image ?? {\n imageBase64: screenshotBase64,\n width: context.shotSize.width,\n height: context.shotSize.height,\n };\n const preparedImage = await prepareModelImage({\n imageBase64: modelImage.imageBase64,\n width: modelImage.width,\n height: modelImage.height,\n policy: adapter.imagePreprocess,\n });\n\n const imagePayload = preparedImage.imageBase64;\n\n const msgs: InspectAIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof elementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: elementDescription.images,\n convertHttpImage2Base64: elementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n let res: Awaited<\n ReturnType<typeof callAIWithObjectResponse<AIElementLocateResponse>>\n >;\n try {\n res = await callAIWithObjectResponse<AIElementLocateResponse>(\n msgs,\n modelRuntime,\n {\n abortSignal: options.abortSignal,\n jsonParserSource: 'locate',\n },\n );\n } catch (callError) {\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n rect: undefined,\n parseResult: {\n element: undefined,\n errors: [`AI call error: ${errorMessage}`],\n },\n rawResponse,\n usage,\n reasoning_content: undefined,\n };\n }\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElement: LocateResultElement | undefined;\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n const resultAdapter = adapter.locate.resultAdapter;\n if (!hasLocateResult(res.content, resultAdapter.promptSpec.resultKey)) {\n return {\n rect: undefined,\n parseResult: {\n element: undefined,\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n }\n\n try {\n const mapping = options.searchConfig?.mapping;\n const targetPixelBbox = resultAdapter.adaptElementLocateResultToPixelBbox(\n res.content,\n {\n preparedSize: preparedImage.preparedSize,\n contentSize: preparedImage.contentSize,\n },\n );\n resRect = pixelBboxToRect(\n mapSearchAreaPixelBboxToOriginalPixelBbox(targetPixelBbox, mapping),\n );\n\n debugInspect('resRect', resRect);\n\n const element: LocateResultElement = generateElementByRect(\n resRect,\n elementDescriptionText as string,\n );\n errors = [];\n\n if (element) {\n matchedElement = element;\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse locate result: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n element: matchedElement,\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelRuntime: ModelRuntime;\n abortSignal?: AbortSignal;\n}): Promise<{\n searchAreaConfig?: SearchAreaConfig;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription } = options;\n const modelRuntime = options.modelRuntime;\n const { adapter } = modelRuntime;\n assert(\n adapter.locate.kind === 'standard',\n 'section locate requires a standard locate adapter',\n );\n const screenshotBase64 = context.screenshot.base64;\n const preparedImage = await prepareModelImage({\n imageBase64: screenshotBase64,\n width: context.shotSize.width,\n height: context.shotSize.height,\n policy: adapter.imagePreprocess,\n });\n\n const systemPrompt = systemPromptToLocateSection(\n adapter.locate.resultAdapter.promptSpec,\n );\n const sectionLocatorInstructionText = sectionLocatorInstruction(\n extraTextFromUserPrompt(sectionDescription),\n );\n const msgs: InspectAIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: preparedImage.imageBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n let result: Awaited<\n ReturnType<typeof callAIWithObjectResponse<AISectionLocatorResponse>>\n >;\n try {\n result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n modelRuntime,\n {\n abortSignal: options.abortSignal,\n jsonParserSource: 'section-locator',\n },\n );\n } catch (callError) {\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n searchAreaConfig: undefined,\n error: `AI call error: ${errorMessage}`,\n rawResponse,\n usage,\n };\n }\n\n let searchAreaConfig:\n | Awaited<ReturnType<typeof buildSearchAreaConfig>>\n | undefined;\n let sectionError = result.content.error;\n const resultAdapter = adapter.locate.resultAdapter;\n if (!hasLocateResult(result.content, resultAdapter.promptSpec.resultKey)) {\n return {\n searchAreaConfig: undefined,\n error: sectionError,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n }\n\n try {\n const adaptedResult =\n resultAdapter.adaptSectionLocateResultToPixelBboxGroup(result.content, {\n preparedSize: preparedImage.preparedSize,\n contentSize: preparedImage.contentSize,\n });\n const mergedRect = mergePixelBboxesToRect([\n adaptedResult.target,\n ...(adaptedResult.references ?? []),\n ]);\n debugSection('mergedRect %j', mergedRect);\n\n const expandedRect = expandSearchArea(mergedRect, context.shotSize);\n const originalWidth = expandedRect.width;\n const originalHeight = expandedRect.height;\n debugSection('expanded sectionRect %j', expandedRect);\n\n searchAreaConfig = await buildSearchAreaConfig({\n context,\n baseRect: mergedRect,\n });\n\n debugSection(\n 'scaled section image from %dx%d to %dx%d (scale=%d)',\n originalWidth,\n originalHeight,\n searchAreaConfig.image.width,\n searchAreaConfig.image.height,\n searchAreaConfig.mapping.scale,\n );\n } catch (error) {\n const parseErrorMessage =\n error instanceof Error\n ? `Failed to parse section locate result: ${error.message}`\n : 'unknown error in section locate';\n sectionError = sectionError\n ? `${sectionError} (${parseErrorMessage})`\n : parseErrorMessage;\n }\n\n return {\n searchAreaConfig,\n error: sectionError,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelRuntime: ModelRuntime;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelRuntime } =\n options;\n const systemPrompt = systemPromptToExtract({\n screenshotIncluded: extractOption?.screenshotIncluded !== false,\n referenceImagesIncluded: !!multimodalPrompt?.images?.length,\n });\n const screenshotBase64 = context.screenshot.base64;\n\n const extractDataPromptText = extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'text',\n text: 'This is the current screenshot to evaluate. Unless <DATA_DEMAND> explicitly asks for comparison or matching against reference images, base your answer on this screenshot and its contents when provided.',\n });\n\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: InspectAIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelRuntime);\n\n let parseResult: AIDataExtractionResponse<T>;\n try {\n parseResult = parseXMLExtractionResponse<T>(rawResponse);\n } catch (parseError) {\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n\n return {\n parseResult,\n rawResponse,\n usage,\n reasoning_content,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n modelRuntime: ModelRuntime,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: InspectAIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n debugInspect('AiJudgeOrderSensitive: description=%s', description);\n\n const result = await callAIWithObjectResponse<{ isOrderSensitive: boolean }>(\n msgs,\n modelRuntime,\n {\n jsonParserSource: 'generic-object',\n },\n );\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["debugInspect","getDebug","debugSection","hasLocateResult","input","resultKey","record","locateResult","Array","undefined","buildSearchAreaConfig","options","context","baseRect","scaleRatio","sectionRect","expandSearchArea","croppedResult","cropByRect","scaledResult","scaleImage","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","msgs","item","base64","preProcessImageUrl","AiLocateElement","targetElementDescription","locateOptions","locateAdapter","genericLocate","elementDescription","modelRuntime","adapter","assert","screenshotBase64","elementDescriptionText","userInstructionPrompt","findElementPrompt","systemPrompt","systemPromptToLocateElement","modelImage","preparedImage","prepareModelImage","imagePayload","addOns","res","callAIWithObjectResponse","callError","errorMessage","Error","String","rawResponse","AIResponseParseError","usage","JSON","resRect","matchedElement","errors","resultAdapter","mapping","targetPixelBbox","pixelBboxToRect","mapSearchAreaPixelBboxToOriginalPixelBbox","element","generateElementByRect","e","msg","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","searchAreaConfig","sectionError","adaptedResult","mergedRect","mergePixelBboxesToRect","expandedRect","originalWidth","originalHeight","error","parseErrorMessage","AiExtractElementInfo","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","reasoning_content","callAI","parseResult","parseXMLExtractionResponse","parseError","AiJudgeOrderSensitive","description","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;;;;;;;;;;;AAgEA,MAAMA,eAAeC,SAAS;AAC9B,MAAMC,eAAeD,SAAS;AAE9B,SAASE,gBAAgBC,KAAc,EAAEC,SAAiB;IACxD,IAAI,CAACD,SAAS,AAAiB,YAAjB,OAAOA,OACnB,OAAO;IAGT,MAAME,SAASF;IACf,MAAMG,eAAeD,MAAM,CAACD,UAAU;IACtC,OAAOG,MAAM,OAAO,CAACD,gBACjBA,aAAa,MAAM,GAAG,IACtBA,AAAiBE,WAAjBF;AACN;AAEO,eAAeG,sBAAsBC,OAG3C;IACC,MAAM,EAAEC,OAAO,EAAEC,QAAQ,EAAE,GAAGF;IAC9B,MAAMG,aAAa;IACnB,MAAMC,cAAcC,iBAAiBH,UAAUD,QAAQ,QAAQ;IAE/D,MAAMK,gBAAgB,MAAMC,WAC1BN,QAAQ,UAAU,CAAC,MAAM,EACzBG;IAGF,MAAMI,eAAe,MAAMC,WAAWH,cAAc,WAAW,EAAEH;IACjE,OAAO;QACL,YAAYC;QACZ,OAAO;YACL,aAAaI,aAAa,WAAW;YACrC,OAAOA,aAAa,KAAK;YACzB,QAAQA,aAAa,MAAM;QAC7B;QACA,SAAS;YACP,QAAQ;gBACN,GAAGJ,YAAY,IAAI;gBACnB,GAAGA,YAAY,GAAG;YACpB;YACA,OAAOD;QACT;IACF;AACF;AAEO,MAAMO,0BAA0B,CAACC;IACtC,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAET,OAAOA,OAAO,MAAM;AACtB;AAEO,MAAMC,qBAAqB,OAChCC;IAEA,MAAMC,OAAyC,EAAE;IACjD,IAAID,kBAAkB,QAAQ,QAAQ;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQF,iBAAiB,MAAM,CAAE;YAC1C,MAAMG,SAAS,MAAMC,mBACnBF,KAAK,GAAG,EACR,CAAC,CAACF,iBAAiB,uBAAuB;YAG5CC,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,mCAAmC,EAAEC,KAAK,IAAI,CAAC,uDAAuD,CAAC;oBAChH;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBACpBlB,OAAkE;IAElE,MAAM,EAAEmB,wBAAwB,EAAE,GAAGC,eAAe,GAAGpB;IACvD,MAAMqB,gBAAgBrB,QAAQ,YAAY,CAAC,OAAO,CAAC,MAAM;IACzD,IAAIqB,AAAuB,aAAvBA,cAAc,IAAI,EACpB,OAAOA,cAAc,QAAQ,CAACF,0BAA0BC;IAE1D,OAAOE,cAAcH,0BAA0BC;AACjD;AAEO,eAAeE,cACpBC,kBAA+B,EAC/BvB,OAAsB;IAEtB,MAAM,EAAEC,OAAO,EAAE,GAAGD;IACpB,MAAMwB,eAAexB,QAAQ,YAAY;IACzC,MAAM,EAAEyB,OAAO,EAAE,GAAGD;IACpBE,OACED,AAAwB,eAAxBA,QAAQ,MAAM,CAAC,IAAI,EACnB;IAEF,MAAME,mBAAmB1B,QAAQ,UAAU,CAAC,MAAM;IAElDyB,OAAOH,oBAAoB;IAC3B,MAAMK,yBAAyBlB,wBAAwBa;IACvD,MAAMM,wBAAwBC,kBAAkBF;IAChD,MAAMG,eAAeC,4BACnBP,QAAQ,MAAM,CAAC,aAAa,CAAC,UAAU;IAGzC,MAAMQ,aAAajC,QAAQ,YAAY,EAAE,SAAS;QAChD,aAAa2B;QACb,OAAO1B,QAAQ,QAAQ,CAAC,KAAK;QAC7B,QAAQA,QAAQ,QAAQ,CAAC,MAAM;IACjC;IACA,MAAMiC,gBAAgB,MAAMC,kBAAkB;QAC5C,aAAaF,WAAW,WAAW;QACnC,OAAOA,WAAW,KAAK;QACvB,QAAQA,WAAW,MAAM;QACzB,QAAQR,QAAQ,eAAe;IACjC;IAEA,MAAMW,eAAeF,cAAc,WAAW;IAE9C,MAAMpB,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASiB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKK;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMP;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAON,oBAAiC;QAC1C,MAAMc,SAAS,MAAMzB,mBAAmB;YACtC,QAAQW,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACAT,KAAK,IAAI,IAAIuB;IACf;IAEA,IAAIC;IAGJ,IAAI;QACFA,MAAM,MAAMC,yBACVzB,MACAU,cACA;YACE,aAAaxB,QAAQ,WAAW;YAChC,kBAAkB;QACpB;IAEJ,EAAE,OAAOwC,WAAW;QAClB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,uBACjBL,UAAU,WAAW,GACrBC;QACN,MAAMK,QACJN,qBAAqBK,uBAAuBL,UAAU,KAAK,GAAG1C;QAChE,OAAO;YACL,MAAMA;YACN,aAAa;gBACX,SAASA;gBACT,QAAQ;oBAAC,CAAC,eAAe,EAAE2C,cAAc;iBAAC;YAC5C;YACAG;YACAE;YACA,mBAAmBhD;QACrB;IACF;IAEA,MAAM8C,cAAcG,KAAK,SAAS,CAACT,IAAI,OAAO;IAE9C,IAAIU;IACJ,IAAIC;IACJ,IAAIC,SACF,YAAYZ,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,MAAMa,gBAAgB1B,QAAQ,MAAM,CAAC,aAAa;IAClD,IAAI,CAACjC,gBAAgB8C,IAAI,OAAO,EAAEa,cAAc,UAAU,CAAC,SAAS,GAClE,OAAO;QACL,MAAMrD;QACN,aAAa;YACX,SAASA;YACT,QAAQoD;QACV;QACAN;QACA,OAAON,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;IAGF,IAAI;QACF,MAAMc,UAAUpD,QAAQ,YAAY,EAAE;QACtC,MAAMqD,kBAAkBF,cAAc,mCAAmC,CACvEb,IAAI,OAAO,EACX;YACE,cAAcJ,cAAc,YAAY;YACxC,aAAaA,cAAc,WAAW;QACxC;QAEFc,UAAUM,gBACRC,0CAA0CF,iBAAiBD;QAG7D/D,aAAa,WAAW2D;QAExB,MAAMQ,UAA+BC,sBACnCT,SACApB;QAEFsB,SAAS,EAAE;QAEX,IAAIM,SACFP,iBAAiBO;IAErB,EAAE,OAAOE,GAAG;QACV,MAAMC,MACJD,aAAahB,QACT,CAAC,+BAA+B,EAAEgB,EAAE,OAAO,EAAE,GAC7C;QACN,IAAI,AAACR,UAAUA,QAAQ,WAAW,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAES,IAAI,CAAC,CAAC;aAFtBT,SAAS;YAACS;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMX;QACN,aAAa;YACX,SAASC;YACT,QAAQC;QACV;QACAN;QACA,OAAON,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;AACF;AAEO,eAAesB,gBAAgB5D,OAKrC;IAMC,MAAM,EAAEC,OAAO,EAAE4D,kBAAkB,EAAE,GAAG7D;IACxC,MAAMwB,eAAexB,QAAQ,YAAY;IACzC,MAAM,EAAEyB,OAAO,EAAE,GAAGD;IACpBE,OACED,AAAwB,eAAxBA,QAAQ,MAAM,CAAC,IAAI,EACnB;IAEF,MAAME,mBAAmB1B,QAAQ,UAAU,CAAC,MAAM;IAClD,MAAMiC,gBAAgB,MAAMC,kBAAkB;QAC5C,aAAaR;QACb,OAAO1B,QAAQ,QAAQ,CAAC,KAAK;QAC7B,QAAQA,QAAQ,QAAQ,CAAC,MAAM;QAC/B,QAAQwB,QAAQ,eAAe;IACjC;IAEA,MAAMM,eAAe+B,4BACnBrC,QAAQ,MAAM,CAAC,aAAa,CAAC,UAAU;IAEzC,MAAMsC,gCAAgCC,0BACpCtD,wBAAwBmD;IAE1B,MAAM/C,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASiB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKG,cAAc,WAAW;wBAC9B,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAM6B;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMxB,SAAS,MAAMzB,mBAAmB;YACtC,QAAQiD,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACA/C,KAAK,IAAI,IAAIuB;IACf;IAEA,IAAI4B;IAGJ,IAAI;QACFA,SAAS,MAAM1B,yBACbzB,MACAU,cACA;YACE,aAAaxB,QAAQ,WAAW;YAChC,kBAAkB;QACpB;IAEJ,EAAE,OAAOwC,WAAW;QAClB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,uBACjBL,UAAU,WAAW,GACrBC;QACN,MAAMK,QACJN,qBAAqBK,uBAAuBL,UAAU,KAAK,GAAG1C;QAChE,OAAO;YACL,kBAAkBA;YAClB,OAAO,CAAC,eAAe,EAAE2C,cAAc;YACvCG;YACAE;QACF;IACF;IAEA,IAAIoB;IAGJ,IAAIC,eAAeF,OAAO,OAAO,CAAC,KAAK;IACvC,MAAMd,gBAAgB1B,QAAQ,MAAM,CAAC,aAAa;IAClD,IAAI,CAACjC,gBAAgByE,OAAO,OAAO,EAAEd,cAAc,UAAU,CAAC,SAAS,GACrE,OAAO;QACL,kBAAkBrD;QAClB,OAAOqE;QACP,aAAapB,KAAK,SAAS,CAACkB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;IAGF,IAAI;QACF,MAAMG,gBACJjB,cAAc,wCAAwC,CAACc,OAAO,OAAO,EAAE;YACrE,cAAc/B,cAAc,YAAY;YACxC,aAAaA,cAAc,WAAW;QACxC;QACF,MAAMmC,aAAaC,uBAAuB;YACxCF,cAAc,MAAM;eAChBA,cAAc,UAAU,IAAI,EAAE;SACnC;QACD7E,aAAa,iBAAiB8E;QAE9B,MAAME,eAAelE,iBAAiBgE,YAAYpE,QAAQ,QAAQ;QAClE,MAAMuE,gBAAgBD,aAAa,KAAK;QACxC,MAAME,iBAAiBF,aAAa,MAAM;QAC1ChF,aAAa,2BAA2BgF;QAExCL,mBAAmB,MAAMnE,sBAAsB;YAC7CE;YACA,UAAUoE;QACZ;QAEA9E,aACE,uDACAiF,eACAC,gBACAP,iBAAiB,KAAK,CAAC,KAAK,EAC5BA,iBAAiB,KAAK,CAAC,MAAM,EAC7BA,iBAAiB,OAAO,CAAC,KAAK;IAElC,EAAE,OAAOQ,OAAO;QACd,MAAMC,oBACJD,iBAAiBhC,QACb,CAAC,uCAAuC,EAAEgC,MAAM,OAAO,EAAE,GACzD;QACNP,eAAeA,eACX,GAAGA,aAAa,EAAE,EAAEQ,kBAAkB,CAAC,CAAC,GACxCA;IACN;IAEA,OAAO;QACLT;QACA,OAAOC;QACP,aAAapB,KAAK,SAAS,CAACkB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAeW,qBAAwB5E,OAO7C;IACC,MAAM,EAAE6E,SAAS,EAAE5E,OAAO,EAAE6E,aAAa,EAAEjE,gBAAgB,EAAEW,YAAY,EAAE,GACzExB;IACF,MAAM+B,eAAegD,sBAAsB;QACzC,oBAAoBD,eAAe,uBAAuB;QAC1D,yBAAyB,CAAC,CAACjE,kBAAkB,QAAQ;IACvD;IACA,MAAMc,mBAAmB1B,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM+E,wBAAwBC,uBAC5BjF,QAAQ,eAAe,IAAI,IAC3B6E;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,eAAe,uBAAuB,OAAO;QAC/CI,YAAY,IAAI,CAAC;YACf,MAAM;YACN,MAAM;QACR;QAEAA,YAAY,IAAI,CAAC;YACf,MAAM;YACN,WAAW;gBACT,KAAKvD;gBACL,QAAQ;YACV;QACF;IACF;IAEAuD,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAMlE,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASiB;QAAa;QACxC;YACE,MAAM;YACN,SAASmD;QACX;KACD;IAED,IAAIrE,kBAAkB;QACpB,MAAMwB,SAAS,MAAMzB,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAC,KAAK,IAAI,IAAIuB;IACf;IAEA,MAAM,EACJ,SAASO,WAAW,EACpBE,KAAK,EACLqC,iBAAiB,EAClB,GAAG,MAAMC,OAAOtE,MAAMU;IAEvB,IAAI6D;IACJ,IAAI;QACFA,cAAcC,2BAA8B1C;IAC9C,EAAE,OAAO2C,YAAY;QACnB,MAAM9C,eACJ8C,sBAAsB7C,QAAQ6C,WAAW,OAAO,GAAG5C,OAAO4C;QAC5D,MAAM,IAAI1C,qBACR,CAAC,iBAAiB,EAAEJ,cAAc,EAClCG,aACAE;IAEJ;IAEA,OAAO;QACLuC;QACAzC;QACAE;QACAqC;IACF;AACF;AAEO,eAAeK,sBACpBC,WAAmB,EACnBjE,YAA0B;IAK1B,MAAMO,eAAe2D;IACrB,MAAMC,aAAaC,0BAA0BH;IAE7C,MAAM3E,OAAsB;QAC1B;YAAE,MAAM;YAAU,SAASiB;QAAa;QACxC;YACE,MAAM;YACN,SAAS4D;QACX;KACD;IAEDtG,aAAa,yCAAyCoG;IAEtD,MAAMxB,SAAS,MAAM1B,yBACnBzB,MACAU,cACA;QACE,kBAAkB;IACpB;IAGF,OAAO;QACL,kBAAkByC,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
@@ -1,16 +1,17 @@
1
- import { paddingToMatchBlockByBase64 } from "@midscene/shared/img";
2
1
  import { getDebug } from "@midscene/shared/logger";
3
2
  import { assert } from "@midscene/shared/utils";
4
- import { buildYamlFlowFromPlans, fillBboxParam, findAllMidsceneLocatorField } from "../common.mjs";
3
+ import { buildYamlFlowFromPlans, findAllMidsceneLocatorField } from "../common.mjs";
4
+ import { planningModelFamilyRequiredForLocateMessage } from "./errors.mjs";
5
5
  import { systemPromptToTaskPlanning } from "./prompt/llm-planning.mjs";
6
6
  import { extractXMLTag, parseMarkFinishedIndexes, parseSubGoalsFromXML } from "./prompt/util.mjs";
7
- import { AIResponseParseError, callAI, parseModelResponseJson } from "./service-caller/index.mjs";
7
+ import { AIResponseParseError, callAI } from "./service-caller/index.mjs";
8
+ import { prepareModelImage } from "./workflows/image-preprocess.mjs";
8
9
  const debug = getDebug('planning');
9
10
  const warnLog = getDebug('planning', {
10
11
  console: true
11
12
  });
12
13
  const noPreviousActionsText = 'No previous actions have been executed in this aiAct execution yet. If the instruction asks for actions, choose the first action to execute.';
13
- function parseXMLPlanningResponse(xmlString, modelFamily) {
14
+ function parseXMLPlanningResponse(xmlString, jsonParser) {
14
15
  const thought = extractXMLTag(xmlString, 'thought');
15
16
  const memory = extractXMLTag(xmlString, 'memory');
16
17
  const log = extractXMLTag(xmlString, 'log') || '';
@@ -34,11 +35,12 @@ function parseXMLPlanningResponse(xmlString, modelFamily) {
34
35
  const type = actionType.split('<')[0].trim();
35
36
  let param;
36
37
  if (actionParamStr) try {
37
- param = parseModelResponseJson(actionParamStr, modelFamily, 'input' === type.toLowerCase() ? {
38
- preserveStringValueKeys: [
38
+ param = jsonParser(actionParamStr, {
39
+ source: 'planning-action-param',
40
+ preserveStringValueKeys: 'input' === type.toLowerCase() ? [
39
41
  'value'
40
- ]
41
- } : void 0);
42
+ ] : void 0
43
+ });
42
44
  } catch (e) {
43
45
  throw new Error(`Failed to parse action-param-json: ${e}`);
44
46
  }
@@ -76,27 +78,28 @@ function parseXMLPlanningResponse(xmlString, modelFamily) {
76
78
  };
77
79
  }
78
80
  async function plan(userInstruction, opts) {
79
- const { context, modelConfig, conversationHistory } = opts;
81
+ const { context, conversationHistory } = opts;
82
+ const modelRuntime = opts.modelRuntime;
83
+ const { adapter } = modelRuntime;
80
84
  const { shotSize } = context;
81
85
  const screenshotBase64 = context.screenshot.base64;
82
- const { modelFamily } = modelConfig;
86
+ if (opts.includeLocateInPlanning && !modelRuntime.config.modelFamily) throw new Error(planningModelFamilyRequiredForLocateMessage(modelRuntime.config.slot));
87
+ const locateResultAdapter = modelRuntime.config.modelFamily && 'standard' === adapter.locate.kind ? adapter.locate.resultAdapter : void 0;
83
88
  const includeSubGoals = true === opts.deepThink;
84
89
  const systemPrompt = await systemPromptToTaskPlanning({
85
90
  actionSpace: opts.actionSpace,
86
- modelFamily,
87
- includeBbox: opts.includeBbox,
91
+ locatePromptSpec: locateResultAdapter?.promptSpec,
92
+ includeLocateInPlanning: opts.includeLocateInPlanning,
88
93
  includeThought: true,
89
94
  includeSubGoals
90
95
  });
91
- let imagePayload = screenshotBase64;
92
- let imageWidth = shotSize.width;
93
- let imageHeight = shotSize.height;
94
- if ('qwen2.5-vl' === modelFamily) {
95
- const paddedResult = await paddingToMatchBlockByBase64(imagePayload);
96
- imageWidth = paddedResult.width;
97
- imageHeight = paddedResult.height;
98
- imagePayload = paddedResult.imageBase64;
99
- }
96
+ const preparedImage = await prepareModelImage({
97
+ imageBase64: screenshotBase64,
98
+ width: shotSize.width,
99
+ height: shotSize.height,
100
+ policy: adapter.imagePreprocess
101
+ });
102
+ const imagePayload = preparedImage.imageBase64;
100
103
  const actionContext = opts.actionContext ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\n` : '';
101
104
  const instruction = [
102
105
  {
@@ -159,23 +162,23 @@ async function plan(userInstruction, opts) {
159
162
  ...instruction,
160
163
  ...historyLog
161
164
  ];
162
- let { content: rawResponse, usage, reasoning_content } = await callAI(msgs, modelConfig, {
165
+ let { content: rawResponse, usage, reasoning_content } = await callAI(msgs, modelRuntime, {
163
166
  abortSignal: opts.abortSignal,
164
- forceOriginalImageDetail: 'gpt-5' === modelFamily && opts.includeBbox
167
+ requiresOriginalImageDetail: opts.includeLocateInPlanning
165
168
  });
166
169
  let planFromAI;
167
170
  try {
168
171
  try {
169
- planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);
172
+ planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);
170
173
  } catch {
171
- const retry = await callAI(msgs, modelConfig, {
174
+ const retry = await callAI(msgs, modelRuntime, {
172
175
  abortSignal: opts.abortSignal,
173
- forceOriginalImageDetail: 'gpt-5' === modelFamily && opts.includeBbox
176
+ requiresOriginalImageDetail: opts.includeLocateInPlanning
174
177
  });
175
178
  rawResponse = retry.content;
176
179
  usage = retry.usage;
177
180
  reasoning_content = retry.reasoning_content;
178
- planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);
181
+ planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);
179
182
  }
180
183
  if (planFromAI.action && void 0 !== planFromAI.finalizeSuccess) {
181
184
  warnLog('Planning response included both an action and <complete>; ignoring <complete> output.');
@@ -209,7 +212,22 @@ async function plan(userInstruction, opts) {
209
212
  debug('locateFields', locateFields);
210
213
  locateFields.forEach((field)=>{
211
214
  const locateResult = action.param[field];
212
- if (locateResult && void 0 !== modelFamily) action.param[field] = fillBboxParam(locateResult, imageWidth, imageHeight, modelFamily);
215
+ if (locateResult) {
216
+ if (!opts.includeLocateInPlanning) {
217
+ if ('object' == typeof locateResult) action.param[field] = {
218
+ prompt: locateResult.prompt
219
+ };
220
+ return;
221
+ }
222
+ assert(locateResultAdapter, 'generic planning locate normalization requires a standard locate adapter');
223
+ action.param[field] = {
224
+ ...locateResult,
225
+ locatedPixelBbox: locateResultAdapter.adaptPlanningParamToPixelBbox(locateResult, {
226
+ preparedSize: preparedImage.preparedSize,
227
+ contentSize: preparedImage.contentSize
228
+ })
229
+ };
230
+ }
213
231
  });
214
232
  });
215
233
  if (includeSubGoals) {
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/llm-planning.mjs","sources":["../../../src/ai-model/llm-planning.ts"],"sourcesContent":["import type {\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig, TModelFamily } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n buildYamlFlowFromPlans,\n fillBboxParam,\n findAllMidsceneLocatorField,\n} from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport {\n extractXMLTag,\n parseMarkFinishedIndexes,\n parseSubGoalsFromXML,\n} from './prompt/util';\nimport {\n AIResponseParseError,\n callAI,\n parseModelResponseJson,\n} from './service-caller/index';\n\nconst debug = getDebug('planning');\nconst warnLog = getDebug('planning', { console: true });\n\nconst noPreviousActionsText =\n 'No previous actions have been executed in this aiAct execution yet. If the instruction asks for actions, choose the first action to execute.';\n\n/**\n * Parse XML response from LLM and convert to RawResponsePlanningAIResponse\n */\nexport function parseXMLPlanningResponse(\n xmlString: string,\n modelFamily: TModelFamily | undefined,\n): RawResponsePlanningAIResponse {\n const thought = extractXMLTag(xmlString, 'thought');\n const memory = extractXMLTag(xmlString, 'memory');\n const log = extractXMLTag(xmlString, 'log') || '';\n const error = extractXMLTag(xmlString, 'error');\n const actionType = extractXMLTag(xmlString, 'action-type');\n const actionParamStr = extractXMLTag(xmlString, 'action-param-json');\n\n // Parse <complete> tag with success attribute\n const completeGoalRegex =\n /<complete\\s+success=\"(true|false)\">([\\s\\S]*?)<\\/complete>/i;\n const completeGoalMatch = xmlString.match(completeGoalRegex);\n let finalizeMessage: string | undefined;\n let finalizeSuccess: boolean | undefined;\n\n if (completeGoalMatch) {\n finalizeSuccess = completeGoalMatch[1] === 'true';\n finalizeMessage = completeGoalMatch[2]?.trim() || undefined;\n }\n\n // Parse sub-goal related tags\n const updatePlanContent = extractXMLTag(xmlString, 'update-plan-content');\n const markSubGoalDone = extractXMLTag(xmlString, 'mark-sub-goal-done');\n\n const updateSubGoals = updatePlanContent\n ? parseSubGoalsFromXML(updatePlanContent)\n : undefined;\n const markFinishedIndexes = markSubGoalDone\n ? parseMarkFinishedIndexes(markSubGoalDone)\n : undefined;\n\n // Parse action\n let action: any = null;\n if (actionType && actionType.toLowerCase() !== 'null') {\n // Strip any trailing XML tags that LLM might have leaked into the action type\n // e.g. \"KeyboardPress</action-type>\\n<action-param-json>\" -> \"KeyboardPress\"\n const type = actionType.split('<')[0].trim();\n let param: any = undefined;\n\n if (actionParamStr) {\n try {\n // Parse the JSON string in action-param-json\n param = parseModelResponseJson(\n actionParamStr,\n modelFamily,\n type.toLowerCase() === 'input'\n ? { preserveStringValueKeys: ['value'] }\n : undefined,\n );\n } catch (e) {\n throw new Error(`Failed to parse action-param-json: ${e}`);\n }\n }\n\n action = {\n type,\n ...(param !== undefined ? { param } : {}),\n };\n }\n\n return {\n ...(thought ? { thought } : {}),\n ...(memory ? { memory } : {}),\n log,\n ...(error ? { error } : {}),\n action,\n ...(finalizeMessage !== undefined ? { finalizeMessage } : {}),\n ...(finalizeSuccess !== undefined ? { finalizeSuccess } : {}),\n ...(updateSubGoals?.length ? { updateSubGoals } : {}),\n ...(markFinishedIndexes?.length ? { markFinishedIndexes } : {}),\n };\n}\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory: ConversationHistory;\n includeBbox: boolean;\n imagesIncludeCount?: number;\n // Controls aiAct planning prompt shape and state updates, such as sub-goals.\n deepThink?: boolean;\n abortSignal?: AbortSignal;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { shotSize } = context;\n const screenshotBase64 = context.screenshot.base64;\n\n const { modelFamily } = modelConfig;\n\n // Only enable sub-goals when aiAct is in deep-thinking planning mode.\n const includeSubGoals = opts.deepThink === true;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n modelFamily,\n includeBbox: opts.includeBbox,\n includeThought: true, // always include thought\n includeSubGoals,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = shotSize.width;\n let imageHeight = shotSize.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n\n // Process image based on VL mode requirements\n if (modelFamily === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n // Build sub-goal status text to include in the message\n // In planning deep-think mode: show full sub-goals with logs\n // Otherwise: show historical execution logs\n const executionProgressText = includeSubGoals\n ? conversationHistory.subGoalsToText()\n : conversationHistory.historicalLogsToText();\n const executionProgressSection = executionProgressText\n ? `\\n\\n${executionProgressText}`\n : conversationHistory.pendingFeedbackMessage\n ? ''\n : `\\n\\n${noPreviousActionsText}`;\n\n // Build memories text to include in the message\n const memoriesText = conversationHistory.memoriesToText();\n const memoriesSection = memoriesText ? `\\n\\n${memoriesText}` : '';\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The previous action has been executed, here is the latest screenshot. Please continue according to the instruction.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `This is the current screenshot.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n\n // Compress history if it exceeds the threshold to avoid context overflow\n conversationHistory.compressHistory(50, 20);\n\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n let {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelConfig, {\n abortSignal: opts.abortSignal,\n // When GPT-5 planning includes bbox, the planning call also performs\n // localization, so the screenshot should be sent with original detail.\n forceOriginalImageDetail: modelFamily === 'gpt-5' && opts.includeBbox,\n });\n\n // Parse XML response to JSON object, retry once on parse failure\n let planFromAI: RawResponsePlanningAIResponse;\n try {\n try {\n planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);\n } catch {\n const retry = await callAI(msgs, modelConfig, {\n abortSignal: opts.abortSignal,\n // Keep retry requests consistent with the initial planning call.\n forceOriginalImageDetail: modelFamily === 'gpt-5' && opts.includeBbox,\n });\n rawResponse = retry.content;\n usage = retry.usage;\n reasoning_content = retry.reasoning_content;\n planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);\n }\n\n if (planFromAI.action && planFromAI.finalizeSuccess !== undefined) {\n warnLog(\n 'Planning response included both an action and <complete>; ignoring <complete> output.',\n );\n planFromAI.finalizeMessage = undefined;\n planFromAI.finalizeSuccess = undefined;\n }\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n\n // Check if task is completed via <complete> tag\n if (planFromAI.finalizeSuccess !== undefined) {\n debug('task completed via <complete> tag, stop planning');\n shouldContinuePlanning = false;\n // Mark all sub-goals as finished when goal is completed in planning deep-think mode.\n if (includeSubGoals) {\n conversationHistory.markAllSubGoalsFinished();\n }\n }\n\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult && modelFamily !== undefined) {\n // Always use model family to fill bbox parameters\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n modelFamily,\n );\n }\n });\n });\n\n // Update sub-goals in conversation history only in planning deep-think mode.\n if (includeSubGoals) {\n if (planFromAI.updateSubGoals?.length) {\n conversationHistory.mergeSubGoals(planFromAI.updateSubGoals);\n }\n if (planFromAI.markFinishedIndexes?.length) {\n for (const index of planFromAI.markFinishedIndexes) {\n conversationHistory.markSubGoalFinished(index);\n }\n }\n // Append the planning log to the currently running sub-goal\n if (planFromAI.log) {\n conversationHistory.appendSubGoalLog(planFromAI.log);\n }\n } else {\n // Without planning deep-think mode, accumulate logs as historical execution steps.\n if (planFromAI.log) {\n conversationHistory.appendHistoricalLog(planFromAI.log);\n }\n }\n\n // Append memory to conversation history if present\n if (planFromAI.memory) {\n conversationHistory.appendMemory(planFromAI.memory);\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n}\n"],"names":["debug","getDebug","warnLog","noPreviousActionsText","parseXMLPlanningResponse","xmlString","modelFamily","thought","extractXMLTag","memory","log","error","actionType","actionParamStr","completeGoalRegex","completeGoalMatch","finalizeMessage","finalizeSuccess","undefined","updatePlanContent","markSubGoalDone","updateSubGoals","parseSubGoalsFromXML","markFinishedIndexes","parseMarkFinishedIndexes","action","type","param","parseModelResponseJson","e","Error","plan","userInstruction","opts","context","modelConfig","conversationHistory","shotSize","screenshotBase64","includeSubGoals","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","paddedResult","paddingToMatchBlockByBase64","actionContext","instruction","latestFeedbackMessage","executionProgressText","executionProgressSection","memoriesText","memoriesSection","historyLog","msgs","rawResponse","usage","reasoning_content","callAI","planFromAI","retry","actions","shouldContinuePlanning","returnValue","buildYamlFlowFromPlans","assert","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","fillBboxParam","index","parseError","errorMessage","String","AIResponseParseError"],"mappings":";;;;;;;AA8BA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,UAAUD,SAAS,YAAY;IAAE,SAAS;AAAK;AAErD,MAAME,wBACJ;AAKK,SAASC,yBACdC,SAAiB,EACjBC,WAAqC;IAErC,MAAMC,UAAUC,cAAcH,WAAW;IACzC,MAAMI,SAASD,cAAcH,WAAW;IACxC,MAAMK,MAAMF,cAAcH,WAAW,UAAU;IAC/C,MAAMM,QAAQH,cAAcH,WAAW;IACvC,MAAMO,aAAaJ,cAAcH,WAAW;IAC5C,MAAMQ,iBAAiBL,cAAcH,WAAW;IAGhD,MAAMS,oBACJ;IACF,MAAMC,oBAAoBV,UAAU,KAAK,CAACS;IAC1C,IAAIE;IACJ,IAAIC;IAEJ,IAAIF,mBAAmB;QACrBE,kBAAkBF,AAAyB,WAAzBA,iBAAiB,CAAC,EAAE;QACtCC,kBAAkBD,iBAAiB,CAAC,EAAE,EAAE,UAAUG;IACpD;IAGA,MAAMC,oBAAoBX,cAAcH,WAAW;IACnD,MAAMe,kBAAkBZ,cAAcH,WAAW;IAEjD,MAAMgB,iBAAiBF,oBACnBG,qBAAqBH,qBACrBD;IACJ,MAAMK,sBAAsBH,kBACxBI,yBAAyBJ,mBACzBF;IAGJ,IAAIO,SAAc;IAClB,IAAIb,cAAcA,AAA6B,WAA7BA,WAAW,WAAW,IAAe;QAGrD,MAAMc,OAAOd,WAAW,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI;QAC1C,IAAIe;QAEJ,IAAId,gBACF,IAAI;YAEFc,QAAQC,uBACNf,gBACAP,aACAoB,AAAuB,YAAvBA,KAAK,WAAW,KACZ;gBAAE,yBAAyB;oBAAC;iBAAQ;YAAC,IACrCR;QAER,EAAE,OAAOW,GAAG;YACV,MAAM,IAAIC,MAAM,CAAC,mCAAmC,EAAED,GAAG;QAC3D;QAGFJ,SAAS;YACPC;YACA,GAAIC,AAAUT,WAAVS,QAAsB;gBAAEA;YAAM,IAAI,CAAC,CAAC;QAC1C;IACF;IAEA,OAAO;QACL,GAAIpB,UAAU;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAC9B,GAAIE,SAAS;YAAEA;QAAO,IAAI,CAAC,CAAC;QAC5BC;QACA,GAAIC,QAAQ;YAAEA;QAAM,IAAI,CAAC,CAAC;QAC1Bc;QACA,GAAIT,AAAoBE,WAApBF,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAIC,AAAoBC,WAApBD,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAII,gBAAgB,SAAS;YAAEA;QAAe,IAAI,CAAC,CAAC;QACpD,GAAIE,qBAAqB,SAAS;YAAEA;QAAoB,IAAI,CAAC,CAAC;IAChE;AACF;AAEO,eAAeQ,KACpBC,eAAuB,EACvBC,IAYC;IAED,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGH;IACtD,MAAM,EAAEI,QAAQ,EAAE,GAAGH;IACrB,MAAMI,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM,EAAE5B,WAAW,EAAE,GAAG6B;IAGxB,MAAMI,kBAAkBN,AAAmB,SAAnBA,KAAK,SAAS;IAEtC,MAAMO,eAAe,MAAMC,2BAA2B;QACpD,aAAaR,KAAK,WAAW;QAC7B3B;QACA,aAAa2B,KAAK,WAAW;QAC7B,gBAAgB;QAChBM;IACF;IAEA,IAAIG,eAAeJ;IACnB,IAAIK,aAAaN,SAAS,KAAK;IAC/B,IAAIO,cAAcP,SAAS,MAAM;IAKjC,IAAI/B,AAAgB,iBAAhBA,aAA8B;QAChC,MAAMuC,eAAe,MAAMC,4BAA4BJ;QACvDC,aAAaE,aAAa,KAAK;QAC/BD,cAAcC,aAAa,MAAM;QACjCH,eAAeG,aAAa,WAAW;IACzC;IAEA,MAAME,gBAAgBd,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMe,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEf,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAIiB;IAKJ,MAAMC,wBAAwBX,kBAC1BH,oBAAoB,cAAc,KAClCA,oBAAoB,oBAAoB;IAC5C,MAAMe,2BAA2BD,wBAC7B,CAAC,IAAI,EAAEA,uBAAuB,GAC9Bd,oBAAoB,sBAAsB,GACxC,KACA,CAAC,IAAI,EAAEjC,uBAAuB;IAGpC,MAAMiD,eAAehB,oBAAoB,cAAc;IACvD,MAAMiB,kBAAkBD,eAAe,CAAC,IAAI,EAAEA,cAAc,GAAG;IAE/D,IAAIhB,oBAAoB,sBAAsB,EAAE;QAC9Ca,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGb,oBAAoB,sBAAsB,CAAC,qHAAqH,EAAEiB,kBAAkBF,0BAA0B;gBACzN;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKT;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAN,oBAAoB,mCAAmC;IACzD,OACEa,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM,CAAC,+BAA+B,EAAEI,kBAAkBF,0BAA0B;YACtF;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKT;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFN,oBAAoB,MAAM,CAACa;IAG3Bb,oBAAoB,eAAe,CAAC,IAAI;IAExC,MAAMkB,aAAalB,oBAAoB,QAAQ,CAACH,KAAK,kBAAkB;IAEvE,MAAMsB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASf;QAAa;WACrCQ;WACAM;KACJ;IAED,IAAI,EACF,SAASE,WAAW,EACpBC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,OAAOJ,MAAMpB,aAAa;QAClC,aAAaF,KAAK,WAAW;QAG7B,0BAA0B3B,AAAgB,YAAhBA,eAA2B2B,KAAK,WAAW;IACvE;IAGA,IAAI2B;IACJ,IAAI;QACF,IAAI;YACFA,aAAaxD,yBAAyBoD,aAAalD;QACrD,EAAE,OAAM;YACN,MAAMuD,QAAQ,MAAMF,OAAOJ,MAAMpB,aAAa;gBAC5C,aAAaF,KAAK,WAAW;gBAE7B,0BAA0B3B,AAAgB,YAAhBA,eAA2B2B,KAAK,WAAW;YACvE;YACAuB,cAAcK,MAAM,OAAO;YAC3BJ,QAAQI,MAAM,KAAK;YACnBH,oBAAoBG,MAAM,iBAAiB;YAC3CD,aAAaxD,yBAAyBoD,aAAalD;QACrD;QAEA,IAAIsD,WAAW,MAAM,IAAIA,AAA+B1C,WAA/B0C,WAAW,eAAe,EAAgB;YACjE1D,QACE;YAEF0D,WAAW,eAAe,GAAG1C;YAC7B0C,WAAW,eAAe,GAAG1C;QAC/B;QAEA,MAAM4C,UAAUF,WAAW,MAAM,GAAG;YAACA,WAAW,MAAM;SAAC,GAAG,EAAE;QAC5D,IAAIG,yBAAyB;QAG7B,IAAIH,AAA+B1C,WAA/B0C,WAAW,eAAe,EAAgB;YAC5C5D,MAAM;YACN+D,yBAAyB;YAEzB,IAAIxB,iBACFH,oBAAoB,uBAAuB;QAE/C;QAEA,MAAM4B,cAAkC;YACtC,GAAGJ,UAAU;YACbE;YACAN;YACAC;YACAC;YACA,UAAUO,uBAAuBH,SAAS7B,KAAK,WAAW;YAC1D8B;QACF;QAEAG,OAAON,YAAY;QAEnBE,QAAQ,OAAO,CAAC,CAACrC;YACf,MAAMC,OAAOD,OAAO,IAAI;YACxB,MAAM0C,sBAAsBlC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACR,SAAWA,OAAO,IAAI,KAAKC;YAG9B1B,MAAM,+BAA+BmE;YACrC,MAAMC,eAAeD,sBACjBE,4BAA4BF,oBAAoB,WAAW,IAC3D,EAAE;YAENnE,MAAM,gBAAgBoE;YAEtBA,aAAa,OAAO,CAAC,CAACE;gBACpB,MAAMC,eAAe9C,OAAO,KAAK,CAAC6C,MAAM;gBACxC,IAAIC,gBAAgBjE,AAAgBY,WAAhBZ,aAElBmB,OAAO,KAAK,CAAC6C,MAAM,GAAGE,cACpBD,cACA5B,YACAC,aACAtC;YAGN;QACF;QAGA,IAAIiC,iBAAiB;YACnB,IAAIqB,WAAW,cAAc,EAAE,QAC7BxB,oBAAoB,aAAa,CAACwB,WAAW,cAAc;YAE7D,IAAIA,WAAW,mBAAmB,EAAE,QAClC,KAAK,MAAMa,SAASb,WAAW,mBAAmB,CAChDxB,oBAAoB,mBAAmB,CAACqC;YAI5C,IAAIb,WAAW,GAAG,EAChBxB,oBAAoB,gBAAgB,CAACwB,WAAW,GAAG;QAEvD,OAEE,IAAIA,WAAW,GAAG,EAChBxB,oBAAoB,mBAAmB,CAACwB,WAAW,GAAG;QAK1D,IAAIA,WAAW,MAAM,EACnBxB,oBAAoB,YAAY,CAACwB,WAAW,MAAM;QAGpDxB,oBAAoB,MAAM,CAAC;YACzB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAMoB;gBACR;aACD;QACH;QAEA,OAAOQ;IACT,EAAE,OAAOU,YAAY;QAEnB,MAAMC,eACJD,sBAAsB5C,QAAQ4C,WAAW,OAAO,GAAGE,OAAOF;QAC5D,MAAM,IAAIG,qBACR,CAAC,iBAAiB,EAAEF,cAAc,EAClCnB,aACAC;IAEJ;AACF"}
1
+ {"version":3,"file":"ai-model/llm-planning.mjs","sources":["../../../src/ai-model/llm-planning.ts"],"sourcesContent":["import type {\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n} from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport { buildYamlFlowFromPlans, findAllMidsceneLocatorField } from '../common';\nimport { planningModelFamilyRequiredForLocateMessage } from './errors';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport {\n extractXMLTag,\n parseMarkFinishedIndexes,\n parseSubGoalsFromXML,\n} from './prompt/util';\nimport { AIResponseParseError, callAI } from './service-caller/index';\nimport type { JsonParser, JsonParserSource } from './service-caller/json';\nimport { prepareModelImage } from './workflows/image-preprocess';\nimport type { PlanOptions } from './workflows/planning/types';\n\nconst debug = getDebug('planning');\nconst warnLog = getDebug('planning', { console: true });\n\nconst noPreviousActionsText =\n 'No previous actions have been executed in this aiAct execution yet. If the instruction asks for actions, choose the first action to execute.';\n\n/**\n * Parse XML response from LLM and convert to RawResponsePlanningAIResponse.\n */\nexport function parseXMLPlanningResponse(\n xmlString: string,\n jsonParser: JsonParser,\n): RawResponsePlanningAIResponse {\n const thought = extractXMLTag(xmlString, 'thought');\n const memory = extractXMLTag(xmlString, 'memory');\n const log = extractXMLTag(xmlString, 'log') || '';\n const error = extractXMLTag(xmlString, 'error');\n const actionType = extractXMLTag(xmlString, 'action-type');\n const actionParamStr = extractXMLTag(xmlString, 'action-param-json');\n\n // Parse <complete> tag with success attribute\n const completeGoalRegex =\n /<complete\\s+success=\"(true|false)\">([\\s\\S]*?)<\\/complete>/i;\n const completeGoalMatch = xmlString.match(completeGoalRegex);\n let finalizeMessage: string | undefined;\n let finalizeSuccess: boolean | undefined;\n\n if (completeGoalMatch) {\n finalizeSuccess = completeGoalMatch[1] === 'true';\n finalizeMessage = completeGoalMatch[2]?.trim() || undefined;\n }\n\n // Parse sub-goal related tags\n const updatePlanContent = extractXMLTag(xmlString, 'update-plan-content');\n const markSubGoalDone = extractXMLTag(xmlString, 'mark-sub-goal-done');\n\n const updateSubGoals = updatePlanContent\n ? parseSubGoalsFromXML(updatePlanContent)\n : undefined;\n const markFinishedIndexes = markSubGoalDone\n ? parseMarkFinishedIndexes(markSubGoalDone)\n : undefined;\n\n // Parse action\n let action: any = null;\n if (actionType && actionType.toLowerCase() !== 'null') {\n // Strip any trailing XML tags that LLM might have leaked into the action type\n // e.g. \"KeyboardPress</action-type>\\n<action-param-json>\" -> \"KeyboardPress\"\n const type = actionType.split('<')[0].trim();\n let param: any = undefined;\n\n if (actionParamStr) {\n try {\n // Parse the JSON string in action-param-json\n param = jsonParser(actionParamStr, {\n source: 'planning-action-param',\n preserveStringValueKeys:\n type.toLowerCase() === 'input' ? ['value'] : undefined,\n });\n } catch (e) {\n throw new Error(`Failed to parse action-param-json: ${e}`);\n }\n }\n\n action = {\n type,\n ...(param !== undefined ? { param } : {}),\n };\n }\n\n return {\n ...(thought ? { thought } : {}),\n ...(memory ? { memory } : {}),\n log,\n ...(error ? { error } : {}),\n action,\n ...(finalizeMessage !== undefined ? { finalizeMessage } : {}),\n ...(finalizeSuccess !== undefined ? { finalizeSuccess } : {}),\n ...(updateSubGoals?.length ? { updateSubGoals } : {}),\n ...(markFinishedIndexes?.length ? { markFinishedIndexes } : {}),\n };\n}\n\nexport async function plan(\n userInstruction: string,\n opts: PlanOptions,\n): Promise<PlanningAIResponse> {\n const { context, conversationHistory } = opts;\n const modelRuntime = opts.modelRuntime;\n const { adapter } = modelRuntime;\n const { shotSize } = context;\n const screenshotBase64 = context.screenshot.base64;\n\n if (opts.includeLocateInPlanning && !modelRuntime.config.modelFamily) {\n throw new Error(\n planningModelFamilyRequiredForLocateMessage(modelRuntime.config.slot),\n );\n }\n\n const locateResultAdapter =\n modelRuntime.config.modelFamily && adapter.locate.kind === 'standard'\n ? adapter.locate.resultAdapter\n : undefined;\n\n // Only enable sub-goals when aiAct is in deep-thinking planning mode.\n const includeSubGoals = opts.deepThink === true;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n locatePromptSpec: locateResultAdapter?.promptSpec,\n includeLocateInPlanning: opts.includeLocateInPlanning,\n includeThought: true, // always include thought\n includeSubGoals,\n });\n\n const preparedImage = await prepareModelImage({\n imageBase64: screenshotBase64,\n width: shotSize.width,\n height: shotSize.height,\n policy: adapter.imagePreprocess,\n });\n const imagePayload = preparedImage.imageBase64;\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n // Build sub-goal status text to include in the message\n // In planning deep-think mode: show full sub-goals with logs\n // Otherwise: show historical execution logs\n const executionProgressText = includeSubGoals\n ? conversationHistory.subGoalsToText()\n : conversationHistory.historicalLogsToText();\n const executionProgressSection = executionProgressText\n ? `\\n\\n${executionProgressText}`\n : conversationHistory.pendingFeedbackMessage\n ? ''\n : `\\n\\n${noPreviousActionsText}`;\n\n // Build memories text to include in the message\n const memoriesText = conversationHistory.memoriesToText();\n const memoriesSection = memoriesText ? `\\n\\n${memoriesText}` : '';\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The previous action has been executed, here is the latest screenshot. Please continue according to the instruction.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `This is the current screenshot.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n\n // Compress history if it exceeds the threshold to avoid context overflow\n conversationHistory.compressHistory(50, 20);\n\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n let {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelRuntime, {\n abortSignal: opts.abortSignal,\n // Planning with locate results is localization-sensitive. Adapters decide\n // whether this should request original image detail.\n requiresOriginalImageDetail: opts.includeLocateInPlanning,\n });\n\n // Parse XML response to JSON object, retry once on parse failure\n let planFromAI: RawResponsePlanningAIResponse;\n try {\n try {\n planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);\n } catch {\n const retry = await callAI(msgs, modelRuntime, {\n abortSignal: opts.abortSignal,\n // Keep retry requests consistent with the initial planning call.\n requiresOriginalImageDetail: opts.includeLocateInPlanning,\n });\n rawResponse = retry.content;\n usage = retry.usage;\n reasoning_content = retry.reasoning_content;\n planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);\n }\n\n if (planFromAI.action && planFromAI.finalizeSuccess !== undefined) {\n warnLog(\n 'Planning response included both an action and <complete>; ignoring <complete> output.',\n );\n planFromAI.finalizeMessage = undefined;\n planFromAI.finalizeSuccess = undefined;\n }\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n\n // Check if task is completed via <complete> tag\n if (planFromAI.finalizeSuccess !== undefined) {\n debug('task completed via <complete> tag, stop planning');\n shouldContinuePlanning = false;\n // Mark all sub-goals as finished when goal is completed in planning deep-think mode.\n if (includeSubGoals) {\n conversationHistory.markAllSubGoalsFinished();\n }\n }\n\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult) {\n if (!opts.includeLocateInPlanning) {\n if (typeof locateResult === 'object') {\n // In prompt-only planning mode, ignore any accidental coordinates from the model.\n action.param[field] = { prompt: locateResult.prompt };\n }\n return;\n }\n\n assert(\n locateResultAdapter,\n 'generic planning locate normalization requires a standard locate adapter',\n );\n action.param[field] = {\n ...locateResult,\n locatedPixelBbox: locateResultAdapter.adaptPlanningParamToPixelBbox(\n locateResult,\n {\n preparedSize: preparedImage.preparedSize,\n contentSize: preparedImage.contentSize,\n },\n ),\n };\n }\n });\n });\n\n // Update sub-goals in conversation history only in planning deep-think mode.\n if (includeSubGoals) {\n if (planFromAI.updateSubGoals?.length) {\n conversationHistory.mergeSubGoals(planFromAI.updateSubGoals);\n }\n if (planFromAI.markFinishedIndexes?.length) {\n for (const index of planFromAI.markFinishedIndexes) {\n conversationHistory.markSubGoalFinished(index);\n }\n }\n // Append the planning log to the currently running sub-goal\n if (planFromAI.log) {\n conversationHistory.appendSubGoalLog(planFromAI.log);\n }\n } else {\n // Without planning deep-think mode, accumulate logs as historical execution steps.\n if (planFromAI.log) {\n conversationHistory.appendHistoricalLog(planFromAI.log);\n }\n }\n\n // Append memory to conversation history if present\n if (planFromAI.memory) {\n conversationHistory.appendMemory(planFromAI.memory);\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n}\n"],"names":["debug","getDebug","warnLog","noPreviousActionsText","parseXMLPlanningResponse","xmlString","jsonParser","thought","extractXMLTag","memory","log","error","actionType","actionParamStr","completeGoalRegex","completeGoalMatch","finalizeMessage","finalizeSuccess","undefined","updatePlanContent","markSubGoalDone","updateSubGoals","parseSubGoalsFromXML","markFinishedIndexes","parseMarkFinishedIndexes","action","type","param","e","Error","plan","userInstruction","opts","context","conversationHistory","modelRuntime","adapter","shotSize","screenshotBase64","planningModelFamilyRequiredForLocateMessage","locateResultAdapter","includeSubGoals","systemPrompt","systemPromptToTaskPlanning","preparedImage","prepareModelImage","imagePayload","actionContext","instruction","latestFeedbackMessage","executionProgressText","executionProgressSection","memoriesText","memoriesSection","historyLog","msgs","rawResponse","usage","reasoning_content","callAI","planFromAI","retry","actions","shouldContinuePlanning","returnValue","buildYamlFlowFromPlans","assert","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","index","parseError","errorMessage","String","AIResponseParseError"],"mappings":";;;;;;;;AAoBA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,UAAUD,SAAS,YAAY;IAAE,SAAS;AAAK;AAErD,MAAME,wBACJ;AAKK,SAASC,yBACdC,SAAiB,EACjBC,UAAsB;IAEtB,MAAMC,UAAUC,cAAcH,WAAW;IACzC,MAAMI,SAASD,cAAcH,WAAW;IACxC,MAAMK,MAAMF,cAAcH,WAAW,UAAU;IAC/C,MAAMM,QAAQH,cAAcH,WAAW;IACvC,MAAMO,aAAaJ,cAAcH,WAAW;IAC5C,MAAMQ,iBAAiBL,cAAcH,WAAW;IAGhD,MAAMS,oBACJ;IACF,MAAMC,oBAAoBV,UAAU,KAAK,CAACS;IAC1C,IAAIE;IACJ,IAAIC;IAEJ,IAAIF,mBAAmB;QACrBE,kBAAkBF,AAAyB,WAAzBA,iBAAiB,CAAC,EAAE;QACtCC,kBAAkBD,iBAAiB,CAAC,EAAE,EAAE,UAAUG;IACpD;IAGA,MAAMC,oBAAoBX,cAAcH,WAAW;IACnD,MAAMe,kBAAkBZ,cAAcH,WAAW;IAEjD,MAAMgB,iBAAiBF,oBACnBG,qBAAqBH,qBACrBD;IACJ,MAAMK,sBAAsBH,kBACxBI,yBAAyBJ,mBACzBF;IAGJ,IAAIO,SAAc;IAClB,IAAIb,cAAcA,AAA6B,WAA7BA,WAAW,WAAW,IAAe;QAGrD,MAAMc,OAAOd,WAAW,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI;QAC1C,IAAIe;QAEJ,IAAId,gBACF,IAAI;YAEFc,QAAQrB,WAAWO,gBAAgB;gBACjC,QAAQ;gBACR,yBACEa,AAAuB,YAAvBA,KAAK,WAAW,KAAiB;oBAAC;iBAAQ,GAAGR;YACjD;QACF,EAAE,OAAOU,GAAG;YACV,MAAM,IAAIC,MAAM,CAAC,mCAAmC,EAAED,GAAG;QAC3D;QAGFH,SAAS;YACPC;YACA,GAAIC,AAAUT,WAAVS,QAAsB;gBAAEA;YAAM,IAAI,CAAC,CAAC;QAC1C;IACF;IAEA,OAAO;QACL,GAAIpB,UAAU;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAC9B,GAAIE,SAAS;YAAEA;QAAO,IAAI,CAAC,CAAC;QAC5BC;QACA,GAAIC,QAAQ;YAAEA;QAAM,IAAI,CAAC,CAAC;QAC1Bc;QACA,GAAIT,AAAoBE,WAApBF,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAIC,AAAoBC,WAApBD,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAII,gBAAgB,SAAS;YAAEA;QAAe,IAAI,CAAC,CAAC;QACpD,GAAIE,qBAAqB,SAAS;YAAEA;QAAoB,IAAI,CAAC,CAAC;IAChE;AACF;AAEO,eAAeO,KACpBC,eAAuB,EACvBC,IAAiB;IAEjB,MAAM,EAAEC,OAAO,EAAEC,mBAAmB,EAAE,GAAGF;IACzC,MAAMG,eAAeH,KAAK,YAAY;IACtC,MAAM,EAAEI,OAAO,EAAE,GAAGD;IACpB,MAAM,EAAEE,QAAQ,EAAE,GAAGJ;IACrB,MAAMK,mBAAmBL,QAAQ,UAAU,CAAC,MAAM;IAElD,IAAID,KAAK,uBAAuB,IAAI,CAACG,aAAa,MAAM,CAAC,WAAW,EAClE,MAAM,IAAIN,MACRU,4CAA4CJ,aAAa,MAAM,CAAC,IAAI;IAIxE,MAAMK,sBACJL,aAAa,MAAM,CAAC,WAAW,IAAIC,AAAwB,eAAxBA,QAAQ,MAAM,CAAC,IAAI,GAClDA,QAAQ,MAAM,CAAC,aAAa,GAC5BlB;IAGN,MAAMuB,kBAAkBT,AAAmB,SAAnBA,KAAK,SAAS;IAEtC,MAAMU,eAAe,MAAMC,2BAA2B;QACpD,aAAaX,KAAK,WAAW;QAC7B,kBAAkBQ,qBAAqB;QACvC,yBAAyBR,KAAK,uBAAuB;QACrD,gBAAgB;QAChBS;IACF;IAEA,MAAMG,gBAAgB,MAAMC,kBAAkB;QAC5C,aAAaP;QACb,OAAOD,SAAS,KAAK;QACrB,QAAQA,SAAS,MAAM;QACvB,QAAQD,QAAQ,eAAe;IACjC;IACA,MAAMU,eAAeF,cAAc,WAAW;IAE9C,MAAMG,gBAAgBf,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMgB,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEhB,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAIkB;IAKJ,MAAMC,wBAAwBT,kBAC1BP,oBAAoB,cAAc,KAClCA,oBAAoB,oBAAoB;IAC5C,MAAMiB,2BAA2BD,wBAC7B,CAAC,IAAI,EAAEA,uBAAuB,GAC9BhB,oBAAoB,sBAAsB,GACxC,KACA,CAAC,IAAI,EAAE/B,uBAAuB;IAGpC,MAAMiD,eAAelB,oBAAoB,cAAc;IACvD,MAAMmB,kBAAkBD,eAAe,CAAC,IAAI,EAAEA,cAAc,GAAG;IAE/D,IAAIlB,oBAAoB,sBAAsB,EAAE;QAC9Ce,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGf,oBAAoB,sBAAsB,CAAC,qHAAqH,EAAEmB,kBAAkBF,0BAA0B;gBACzN;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAZ,oBAAoB,mCAAmC;IACzD,OACEe,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM,CAAC,+BAA+B,EAAEI,kBAAkBF,0BAA0B;YACtF;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKL;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFZ,oBAAoB,MAAM,CAACe;IAG3Bf,oBAAoB,eAAe,CAAC,IAAI;IAExC,MAAMoB,aAAapB,oBAAoB,QAAQ,CAACF,KAAK,kBAAkB;IAEvE,MAAMuB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASb;QAAa;WACrCM;WACAM;KACJ;IAED,IAAI,EACF,SAASE,WAAW,EACpBC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,OAAOJ,MAAMpB,cAAc;QACnC,aAAaH,KAAK,WAAW;QAG7B,6BAA6BA,KAAK,uBAAuB;IAC3D;IAGA,IAAI4B;IACJ,IAAI;QACF,IAAI;YACFA,aAAaxD,yBAAyBoD,aAAapB,QAAQ,UAAU;QACvE,EAAE,OAAM;YACN,MAAMyB,QAAQ,MAAMF,OAAOJ,MAAMpB,cAAc;gBAC7C,aAAaH,KAAK,WAAW;gBAE7B,6BAA6BA,KAAK,uBAAuB;YAC3D;YACAwB,cAAcK,MAAM,OAAO;YAC3BJ,QAAQI,MAAM,KAAK;YACnBH,oBAAoBG,MAAM,iBAAiB;YAC3CD,aAAaxD,yBAAyBoD,aAAapB,QAAQ,UAAU;QACvE;QAEA,IAAIwB,WAAW,MAAM,IAAIA,AAA+B1C,WAA/B0C,WAAW,eAAe,EAAgB;YACjE1D,QACE;YAEF0D,WAAW,eAAe,GAAG1C;YAC7B0C,WAAW,eAAe,GAAG1C;QAC/B;QAEA,MAAM4C,UAAUF,WAAW,MAAM,GAAG;YAACA,WAAW,MAAM;SAAC,GAAG,EAAE;QAC5D,IAAIG,yBAAyB;QAG7B,IAAIH,AAA+B1C,WAA/B0C,WAAW,eAAe,EAAgB;YAC5C5D,MAAM;YACN+D,yBAAyB;YAEzB,IAAItB,iBACFP,oBAAoB,uBAAuB;QAE/C;QAEA,MAAM8B,cAAkC;YACtC,GAAGJ,UAAU;YACbE;YACAN;YACAC;YACAC;YACA,UAAUO,uBAAuBH,SAAS9B,KAAK,WAAW;YAC1D+B;QACF;QAEAG,OAAON,YAAY;QAEnBE,QAAQ,OAAO,CAAC,CAACrC;YACf,MAAMC,OAAOD,OAAO,IAAI;YACxB,MAAM0C,sBAAsBnC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACP,SAAWA,OAAO,IAAI,KAAKC;YAG9B1B,MAAM,+BAA+BmE;YACrC,MAAMC,eAAeD,sBACjBE,4BAA4BF,oBAAoB,WAAW,IAC3D,EAAE;YAENnE,MAAM,gBAAgBoE;YAEtBA,aAAa,OAAO,CAAC,CAACE;gBACpB,MAAMC,eAAe9C,OAAO,KAAK,CAAC6C,MAAM;gBACxC,IAAIC,cAAc;oBAChB,IAAI,CAACvC,KAAK,uBAAuB,EAAE;wBACjC,IAAI,AAAwB,YAAxB,OAAOuC,cAET9C,OAAO,KAAK,CAAC6C,MAAM,GAAG;4BAAE,QAAQC,aAAa,MAAM;wBAAC;wBAEtD;oBACF;oBAEAL,OACE1B,qBACA;oBAEFf,OAAO,KAAK,CAAC6C,MAAM,GAAG;wBACpB,GAAGC,YAAY;wBACf,kBAAkB/B,oBAAoB,6BAA6B,CACjE+B,cACA;4BACE,cAAc3B,cAAc,YAAY;4BACxC,aAAaA,cAAc,WAAW;wBACxC;oBAEJ;gBACF;YACF;QACF;QAGA,IAAIH,iBAAiB;YACnB,IAAImB,WAAW,cAAc,EAAE,QAC7B1B,oBAAoB,aAAa,CAAC0B,WAAW,cAAc;YAE7D,IAAIA,WAAW,mBAAmB,EAAE,QAClC,KAAK,MAAMY,SAASZ,WAAW,mBAAmB,CAChD1B,oBAAoB,mBAAmB,CAACsC;YAI5C,IAAIZ,WAAW,GAAG,EAChB1B,oBAAoB,gBAAgB,CAAC0B,WAAW,GAAG;QAEvD,OAEE,IAAIA,WAAW,GAAG,EAChB1B,oBAAoB,mBAAmB,CAAC0B,WAAW,GAAG;QAK1D,IAAIA,WAAW,MAAM,EACnB1B,oBAAoB,YAAY,CAAC0B,WAAW,MAAM;QAGpD1B,oBAAoB,MAAM,CAAC;YACzB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAMsB;gBACR;aACD;QACH;QAEA,OAAOQ;IACT,EAAE,OAAOS,YAAY;QAEnB,MAAMC,eACJD,sBAAsB5C,QAAQ4C,WAAW,OAAO,GAAGE,OAAOF;QAC5D,MAAM,IAAIG,qBACR,CAAC,iBAAiB,EAAEF,cAAc,EAClClB,aACAC;IAEJ;AACF"}
@@ -1,10 +1,24 @@
1
- import { adaptBbox, pointToBbox } from "../../common.mjs";
2
1
  import { getDebug } from "@midscene/shared/logger";
2
+ import { finalizePixelBbox } from "../../shared/model-locate-result/bbox.mjs";
3
+ import { mapLocateResultToPixelBboxByCoordinates } from "../../shared/model-locate-result/pixel-bbox-mapper.mjs";
3
4
  const debug = getDebug('auto-glm-actions');
4
5
  const AUTO_GLM_COORDINATE_MAX = 1000;
5
- function autoGLMCoordinateToBbox(x, y, width, height) {
6
- const bbox = pointToBbox(x, y, 10);
7
- return adaptBbox(bbox, width, height, 'auto-glm');
6
+ function autoGLMCoordinateToLocateParam(coordinate, size) {
7
+ const ctx = {
8
+ preparedSize: size
9
+ };
10
+ const pixelBbox = mapLocateResultToPixelBboxByCoordinates({
11
+ type: 'point',
12
+ coordinates: coordinate
13
+ }, ctx, {
14
+ shape: 'point',
15
+ order: 'xy',
16
+ normalizedBy: AUTO_GLM_COORDINATE_MAX
17
+ });
18
+ return {
19
+ prompt: '',
20
+ locatedPixelBbox: finalizePixelBbox(pixelBbox, coordinate, ctx)
21
+ };
8
22
  }
9
23
  const BACK_BUTTON_NAMES = [
10
24
  'AndroidBackButton',
@@ -42,16 +56,7 @@ function transformAutoGLMAction(action, size, actionSpace) {
42
56
  {
43
57
  const tapAction = doAction;
44
58
  debug('Transform Tap action:', tapAction);
45
- const [x1, y1, x2, y2] = autoGLMCoordinateToBbox(tapAction.element[0], tapAction.element[1], size.width, size.height);
46
- const locate = {
47
- prompt: '',
48
- bbox: [
49
- x1,
50
- y1,
51
- x2,
52
- y2
53
- ]
54
- };
59
+ const locate = autoGLMCoordinateToLocateParam(tapAction.element, size);
55
60
  return [
56
61
  {
57
62
  type: 'Tap',
@@ -65,16 +70,7 @@ function transformAutoGLMAction(action, size, actionSpace) {
65
70
  {
66
71
  const doubleTapAction = doAction;
67
72
  debug('Transform Double Tap action:', doubleTapAction);
68
- const [x1, y1, x2, y2] = autoGLMCoordinateToBbox(doubleTapAction.element[0], doubleTapAction.element[1], size.width, size.height);
69
- const locate = {
70
- prompt: '',
71
- bbox: [
72
- x1,
73
- y1,
74
- x2,
75
- y2
76
- ]
77
- };
73
+ const locate = autoGLMCoordinateToLocateParam(doubleTapAction.element, size);
78
74
  return [
79
75
  {
80
76
  type: 'DoubleClick',
@@ -101,16 +97,7 @@ function transformAutoGLMAction(action, size, actionSpace) {
101
97
  {
102
98
  const swipeAction = doAction;
103
99
  debug('Transform Swipe action:', swipeAction);
104
- const [x1, y1, x2, y2] = autoGLMCoordinateToBbox(swipeAction.start[0], swipeAction.start[1], size.width, size.height);
105
- const locate = {
106
- prompt: '',
107
- bbox: [
108
- x1,
109
- y1,
110
- x2,
111
- y2
112
- ]
113
- };
100
+ const locate = autoGLMCoordinateToLocateParam(swipeAction.start, size);
114
101
  const deltaX = swipeAction.end[0] - swipeAction.start[0];
115
102
  const deltaY = swipeAction.end[1] - swipeAction.start[1];
116
103
  let direction;
@@ -141,16 +128,7 @@ function transformAutoGLMAction(action, size, actionSpace) {
141
128
  {
142
129
  const longPressAction = doAction;
143
130
  debug('Transform Long Press action:', longPressAction);
144
- const [x1, y1, x2, y2] = autoGLMCoordinateToBbox(longPressAction.element[0], longPressAction.element[1], size.width, size.height);
145
- const locate = {
146
- prompt: '',
147
- bbox: [
148
- x1,
149
- y1,
150
- x2,
151
- y2
152
- ]
153
- };
131
+ const locate = autoGLMCoordinateToLocateParam(longPressAction.element, size);
154
132
  return [
155
133
  {
156
134
  type: 'LongPress',
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai-model/models/auto-glm/actions.mjs","sources":["../../../../../src/ai-model/models/auto-glm/actions.ts"],"sourcesContent":["import type { DeviceAction } from '@/device';\nimport type {\n PlanningAction,\n PlanningLocateParamWithLocatedPixelBbox,\n} from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport { finalizePixelBbox } from '../../shared/model-locate-result/bbox';\nimport { mapLocateResultToPixelBboxByCoordinates } from '../../shared/model-locate-result/pixel-bbox-mapper';\n\nconst debug = getDebug('auto-glm-actions');\n\n/**\n * Auto-GLM coordinate system range: [0, AUTO_GLM_COORDINATE_MAX]\n */\nconst AUTO_GLM_COORDINATE_MAX = 1000;\n\nfunction autoGLMCoordinateToLocateParam(\n coordinate: [number, number],\n size: { width: number; height: number },\n): PlanningLocateParamWithLocatedPixelBbox {\n const ctx = { preparedSize: size };\n const pixelBbox = mapLocateResultToPixelBboxByCoordinates(\n { type: 'point', coordinates: coordinate },\n ctx,\n { shape: 'point', order: 'xy', normalizedBy: AUTO_GLM_COORDINATE_MAX },\n );\n\n return {\n prompt: '',\n locatedPixelBbox: finalizePixelBbox(pixelBbox, coordinate, ctx),\n };\n}\n\nexport interface BaseAction {\n _metadata: string;\n think?: string;\n}\n\nexport interface TapAction extends BaseAction {\n _metadata: 'do';\n action: 'Tap';\n element: [number, number];\n}\n\nexport interface DoubleTapAction extends BaseAction {\n _metadata: 'do';\n action: 'Double Tap';\n element: [number, number];\n}\n\nexport interface TypeAction extends BaseAction {\n _metadata: 'do';\n action: 'Type';\n text: string;\n}\n\nexport interface SwipeAction extends BaseAction {\n _metadata: 'do';\n action: 'Swipe';\n start: [number, number];\n end: [number, number];\n}\n\nexport interface LongPressAction extends BaseAction {\n _metadata: 'do';\n action: 'Long Press';\n element: [number, number];\n}\n\nexport interface LaunchAction extends BaseAction {\n _metadata: 'do';\n action: 'Launch';\n app: string;\n}\n\nexport interface BackAction extends BaseAction {\n _metadata: 'do';\n action: 'Back';\n}\n\nexport interface HomeAction extends BaseAction {\n _metadata: 'do';\n action: 'Home';\n}\n\nexport interface WaitAction extends BaseAction {\n _metadata: 'do';\n action: 'Wait';\n durationMs: number;\n}\n\nexport interface InteractAction extends BaseAction {\n _metadata: 'do';\n action: 'Interact';\n}\n\nexport interface CallAPIAction extends BaseAction {\n _metadata: 'do';\n action: 'Call_API';\n instruction: string;\n}\n\nexport interface TakeoverAction extends BaseAction {\n _metadata: 'do';\n action: 'Take_over';\n message: string;\n}\n\nexport interface NoteAction extends BaseAction {\n _metadata: 'do';\n action: 'Note';\n message: string;\n}\n\nexport interface FinishAction extends BaseAction {\n _metadata: 'finish';\n message: string;\n}\n\nexport type ParsedAction =\n | TapAction\n | DoubleTapAction\n | TypeAction\n | SwipeAction\n | LongPressAction\n | LaunchAction\n | BackAction\n | HomeAction\n | WaitAction\n | InteractAction\n | CallAPIAction\n | TakeoverAction\n | NoteAction\n | FinishAction;\n\nconst BACK_BUTTON_NAMES = ['AndroidBackButton', 'HarmonyBackButton'];\nconst HOME_BUTTON_NAMES = ['AndroidHomeButton', 'HarmonyHomeButton'];\n\n/**\n * Find the action name in actionSpace that matches one of the known names.\n * Falls back to defaultName if no match found or actionSpace is not provided.\n */\nfunction findActionName(\n actionSpace: DeviceAction[] | undefined,\n knownNames: string[],\n defaultName: string,\n): string {\n if (!actionSpace) return defaultName;\n const match = actionSpace.find((a) => knownNames.includes(a.name));\n return match ? match.name : defaultName;\n}\n\nexport function transformAutoGLMAction(\n action: ParsedAction,\n size: { width: number; height: number },\n actionSpace?: DeviceAction[],\n): PlanningAction[] {\n try {\n switch (action._metadata) {\n case 'finish': {\n const finishAction = action as FinishAction;\n debug('Transform finish action:', finishAction);\n return [\n {\n type: 'Finished',\n param: {},\n thought: finishAction.message,\n },\n ];\n }\n case 'do': {\n const doAction = action as\n | TapAction\n | DoubleTapAction\n | TypeAction\n | SwipeAction\n | LongPressAction\n | LaunchAction\n | BackAction\n | HomeAction\n | WaitAction\n | InteractAction\n | CallAPIAction\n | TakeoverAction\n | NoteAction;\n\n switch ((doAction as any).action) {\n case 'Tap': {\n const tapAction = doAction as TapAction;\n debug('Transform Tap action:', tapAction);\n const locate = autoGLMCoordinateToLocateParam(\n tapAction.element,\n size,\n );\n\n return [\n {\n type: 'Tap',\n param: {\n locate,\n },\n },\n ];\n }\n case 'Double Tap': {\n const doubleTapAction = doAction as DoubleTapAction;\n debug('Transform Double Tap action:', doubleTapAction);\n const locate = autoGLMCoordinateToLocateParam(\n doubleTapAction.element,\n size,\n );\n\n return [\n {\n type: 'DoubleClick',\n param: {\n locate,\n },\n },\n ];\n }\n case 'Type': {\n const typeAction = doAction as TypeAction;\n debug('Transform Type action:', typeAction);\n\n return [\n {\n type: 'Input',\n param: {\n value: typeAction.text,\n },\n },\n ];\n }\n case 'Swipe': {\n const swipeAction = doAction as SwipeAction;\n debug('Transform Swipe action:', swipeAction);\n\n const locate = autoGLMCoordinateToLocateParam(\n swipeAction.start,\n size,\n );\n\n // Calculate horizontal and vertical delta in [0,AUTO_GLM_COORDINATE_MAX] coordinate system\n const deltaX = swipeAction.end[0] - swipeAction.start[0];\n const deltaY = swipeAction.end[1] - swipeAction.start[1];\n\n // Determine direction and distance\n let direction: 'up' | 'down' | 'left' | 'right';\n let distance: number;\n\n const absDeltaX = Math.abs(deltaX);\n const absDeltaY = Math.abs(deltaY);\n\n if (absDeltaY > absDeltaX) {\n // Vertical scroll\n distance = Math.round(\n (absDeltaY * size.height) / AUTO_GLM_COORDINATE_MAX,\n );\n direction = deltaY > 0 ? 'up' : 'down';\n } else {\n // Horizontal scroll\n distance = Math.round(\n (absDeltaX * size.width) / AUTO_GLM_COORDINATE_MAX,\n );\n direction = deltaX > 0 ? 'left' : 'right';\n }\n\n debug(\n `Calculate swipe direction: ${direction}, distance: ${distance}`,\n );\n\n return [\n {\n type: 'Scroll',\n param: {\n locate,\n // The scrolling direction here all refers to which direction of the page's content will appear on the screen.\n distance,\n direction,\n },\n thought: swipeAction.think || '',\n },\n ];\n }\n case 'Long Press': {\n const longPressAction = doAction as LongPressAction;\n debug('Transform Long Press action:', longPressAction);\n const locate = autoGLMCoordinateToLocateParam(\n longPressAction.element,\n size,\n );\n\n return [\n {\n type: 'LongPress',\n param: {\n locate,\n },\n thought: longPressAction.think || '',\n },\n ];\n }\n case 'Back': {\n const backAction = doAction as BackAction;\n debug('Transform Back action:', backAction);\n return [\n {\n type: findActionName(\n actionSpace,\n BACK_BUTTON_NAMES,\n 'AndroidBackButton',\n ),\n param: {},\n thought: backAction.think || '',\n },\n ];\n }\n case 'Home': {\n const homeAction = doAction as HomeAction;\n debug('Transform Home action:', homeAction);\n return [\n {\n type: findActionName(\n actionSpace,\n HOME_BUTTON_NAMES,\n 'AndroidHomeButton',\n ),\n param: {},\n thought: homeAction.think || '',\n },\n ];\n }\n case 'Wait': {\n const waitAction = doAction as WaitAction;\n debug('Transform Wait action:', waitAction);\n return [\n {\n type: 'Sleep',\n param: {\n timeMs: waitAction.durationMs,\n },\n thought: waitAction.think || '',\n },\n ];\n }\n case 'Launch': {\n const launchAction = doAction as LaunchAction;\n debug('Transform Launch action:', launchAction);\n return [\n {\n type: 'Launch',\n param: { uri: launchAction.app },\n thought: launchAction.think || '',\n },\n ];\n }\n case 'Interact': {\n throw new Error(\n `Action \"Interact\" from auto-glm is not supported in the current implementation.`,\n );\n }\n case 'Call_API': {\n throw new Error(\n `Action \"Call_API\" from auto-glm is not supported in the current implementation.`,\n );\n }\n case 'Take_over': {\n throw new Error(\n `Action \"Take_over\" from auto-glm is not supported in the current implementation.`,\n );\n }\n case 'Note': {\n throw new Error(\n `Action \"Note\" from auto-glm is not supported in the current implementation.`,\n );\n }\n default:\n throw new Error(\n `Unknown do() action type: ${(doAction as any).action}`,\n );\n }\n }\n default:\n throw new Error(\n `Unknown action metadata: ${(action as any)._metadata}`,\n );\n }\n } catch (error) {\n const errorMessage = error instanceof Error ? error.message : String(error);\n debug('Transform error:', errorMessage);\n throw new Error(`Failed to transform action: ${errorMessage}`);\n }\n}\n"],"names":["debug","getDebug","AUTO_GLM_COORDINATE_MAX","autoGLMCoordinateToLocateParam","coordinate","size","ctx","pixelBbox","mapLocateResultToPixelBboxByCoordinates","finalizePixelBbox","BACK_BUTTON_NAMES","HOME_BUTTON_NAMES","findActionName","actionSpace","knownNames","defaultName","match","a","transformAutoGLMAction","action","finishAction","doAction","tapAction","locate","doubleTapAction","typeAction","swipeAction","deltaX","deltaY","direction","distance","absDeltaX","Math","absDeltaY","longPressAction","backAction","homeAction","waitAction","launchAction","Error","error","errorMessage","String"],"mappings":";;;AASA,MAAMA,QAAQC,SAAS;AAKvB,MAAMC,0BAA0B;AAEhC,SAASC,+BACPC,UAA4B,EAC5BC,IAAuC;IAEvC,MAAMC,MAAM;QAAE,cAAcD;IAAK;IACjC,MAAME,YAAYC,wCAChB;QAAE,MAAM;QAAS,aAAaJ;IAAW,GACzCE,KACA;QAAE,OAAO;QAAS,OAAO;QAAM,cAAcJ;IAAwB;IAGvE,OAAO;QACL,QAAQ;QACR,kBAAkBO,kBAAkBF,WAAWH,YAAYE;IAC7D;AACF;AAwGA,MAAMI,oBAAoB;IAAC;IAAqB;CAAoB;AACpE,MAAMC,oBAAoB;IAAC;IAAqB;CAAoB;AAMpE,SAASC,eACPC,WAAuC,EACvCC,UAAoB,EACpBC,WAAmB;IAEnB,IAAI,CAACF,aAAa,OAAOE;IACzB,MAAMC,QAAQH,YAAY,IAAI,CAAC,CAACI,IAAMH,WAAW,QAAQ,CAACG,EAAE,IAAI;IAChE,OAAOD,QAAQA,MAAM,IAAI,GAAGD;AAC9B;AAEO,SAASG,uBACdC,MAAoB,EACpBd,IAAuC,EACvCQ,WAA4B;IAE5B,IAAI;QACF,OAAQM,OAAO,SAAS;YACtB,KAAK;gBAAU;oBACb,MAAMC,eAAeD;oBACrBnB,MAAM,4BAA4BoB;oBAClC,OAAO;wBACL;4BACE,MAAM;4BACN,OAAO,CAAC;4BACR,SAASA,aAAa,OAAO;wBAC/B;qBACD;gBACH;YACA,KAAK;gBAAM;oBACT,MAAMC,WAAWF;oBAejB,OAASE,SAAiB,MAAM;wBAC9B,KAAK;4BAAO;gCACV,MAAMC,YAAYD;gCAClBrB,MAAM,yBAAyBsB;gCAC/B,MAAMC,SAASpB,+BACbmB,UAAU,OAAO,EACjBjB;gCAGF,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACLkB;wCACF;oCACF;iCACD;4BACH;wBACA,KAAK;4BAAc;gCACjB,MAAMC,kBAAkBH;gCACxBrB,MAAM,gCAAgCwB;gCACtC,MAAMD,SAASpB,+BACbqB,gBAAgB,OAAO,EACvBnB;gCAGF,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACLkB;wCACF;oCACF;iCACD;4BACH;wBACA,KAAK;4BAAQ;gCACX,MAAME,aAAaJ;gCACnBrB,MAAM,0BAA0ByB;gCAEhC,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACL,OAAOA,WAAW,IAAI;wCACxB;oCACF;iCACD;4BACH;wBACA,KAAK;4BAAS;gCACZ,MAAMC,cAAcL;gCACpBrB,MAAM,2BAA2B0B;gCAEjC,MAAMH,SAASpB,+BACbuB,YAAY,KAAK,EACjBrB;gCAIF,MAAMsB,SAASD,YAAY,GAAG,CAAC,EAAE,GAAGA,YAAY,KAAK,CAAC,EAAE;gCACxD,MAAME,SAASF,YAAY,GAAG,CAAC,EAAE,GAAGA,YAAY,KAAK,CAAC,EAAE;gCAGxD,IAAIG;gCACJ,IAAIC;gCAEJ,MAAMC,YAAYC,KAAK,GAAG,CAACL;gCAC3B,MAAMM,YAAYD,KAAK,GAAG,CAACJ;gCAE3B,IAAIK,YAAYF,WAAW;oCAEzBD,WAAWE,KAAK,KAAK,CAClBC,YAAY5B,KAAK,MAAM,GAAIH;oCAE9B2B,YAAYD,SAAS,IAAI,OAAO;gCAClC,OAAO;oCAELE,WAAWE,KAAK,KAAK,CAClBD,YAAY1B,KAAK,KAAK,GAAIH;oCAE7B2B,YAAYF,SAAS,IAAI,SAAS;gCACpC;gCAEA3B,MACE,CAAC,2BAA2B,EAAE6B,UAAU,YAAY,EAAEC,UAAU;gCAGlE,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACLP;4CAEAO;4CACAD;wCACF;wCACA,SAASH,YAAY,KAAK,IAAI;oCAChC;iCACD;4BACH;wBACA,KAAK;4BAAc;gCACjB,MAAMQ,kBAAkBb;gCACxBrB,MAAM,gCAAgCkC;gCACtC,MAAMX,SAASpB,+BACb+B,gBAAgB,OAAO,EACvB7B;gCAGF,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACLkB;wCACF;wCACA,SAASW,gBAAgB,KAAK,IAAI;oCACpC;iCACD;4BACH;wBACA,KAAK;4BAAQ;gCACX,MAAMC,aAAad;gCACnBrB,MAAM,0BAA0BmC;gCAChC,OAAO;oCACL;wCACE,MAAMvB,eACJC,aACAH,mBACA;wCAEF,OAAO,CAAC;wCACR,SAASyB,WAAW,KAAK,IAAI;oCAC/B;iCACD;4BACH;wBACA,KAAK;4BAAQ;gCACX,MAAMC,aAAaf;gCACnBrB,MAAM,0BAA0BoC;gCAChC,OAAO;oCACL;wCACE,MAAMxB,eACJC,aACAF,mBACA;wCAEF,OAAO,CAAC;wCACR,SAASyB,WAAW,KAAK,IAAI;oCAC/B;iCACD;4BACH;wBACA,KAAK;4BAAQ;gCACX,MAAMC,aAAahB;gCACnBrB,MAAM,0BAA0BqC;gCAChC,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACL,QAAQA,WAAW,UAAU;wCAC/B;wCACA,SAASA,WAAW,KAAK,IAAI;oCAC/B;iCACD;4BACH;wBACA,KAAK;4BAAU;gCACb,MAAMC,eAAejB;gCACrBrB,MAAM,4BAA4BsC;gCAClC,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CAAE,KAAKA,aAAa,GAAG;wCAAC;wCAC/B,SAASA,aAAa,KAAK,IAAI;oCACjC;iCACD;4BACH;wBACA,KAAK;4BACH,MAAM,IAAIC,MACR;wBAGJ,KAAK;4BACH,MAAM,IAAIA,MACR;wBAGJ,KAAK;4BACH,MAAM,IAAIA,MACR;wBAGJ,KAAK;4BACH,MAAM,IAAIA,MACR;wBAGJ;4BACE,MAAM,IAAIA,MACR,CAAC,0BAA0B,EAAGlB,SAAiB,MAAM,EAAE;oBAE7D;gBACF;YACA;gBACE,MAAM,IAAIkB,MACR,CAAC,yBAAyB,EAAGpB,OAAe,SAAS,EAAE;QAE7D;IACF,EAAE,OAAOqB,OAAO;QACd,MAAMC,eAAeD,iBAAiBD,QAAQC,MAAM,OAAO,GAAGE,OAAOF;QACrExC,MAAM,oBAAoByC;QAC1B,MAAM,IAAIF,MAAM,CAAC,4BAA4B,EAAEE,cAAc;IAC/D;AACF"}
@@ -0,0 +1,45 @@
1
+ import { autoGlmLocate } from "./locate.mjs";
2
+ import { autoGlmPlanning } from "./planning.mjs";
3
+ import { getAutoGLMChineseLocatePrompt, getAutoGLMChinesePlanPrompt, getAutoGLMMultilingualLocatePrompt, getAutoGLMMultilingualPlanPrompt } from "./prompt.mjs";
4
+ const defaultAutoGlmReplanningCycleLimit = 100;
5
+ function createAutoGlmAdapter({ getPlanPrompt, getLocatePrompt }) {
6
+ return {
7
+ chatCompletion: {
8
+ unsupportedUserConfig: [
9
+ 'reasoningEnabled',
10
+ 'reasoningEffort',
11
+ 'reasoningBudget'
12
+ ],
13
+ buildChatCompletionParams: ({ midsceneDefaults, userConfig })=>({
14
+ config: {
15
+ temperature: userConfig.temperature ?? midsceneDefaults.temperature,
16
+ top_p: 0.85,
17
+ frequency_penalty: 0.2
18
+ }
19
+ })
20
+ },
21
+ planning: {
22
+ kind: 'custom',
23
+ cacheEnabled: false,
24
+ defaultReplanningCycleLimit: defaultAutoGlmReplanningCycleLimit,
25
+ planFn: (userInstruction, options)=>autoGlmPlanning(userInstruction, options, getPlanPrompt)
26
+ },
27
+ locate: {
28
+ kind: 'custom',
29
+ locateFn: (elementDescription, options)=>autoGlmLocate(elementDescription, options, getLocatePrompt)
30
+ }
31
+ };
32
+ }
33
+ const autoGlmAdapters = {
34
+ 'auto-glm': createAutoGlmAdapter({
35
+ getPlanPrompt: getAutoGLMChinesePlanPrompt,
36
+ getLocatePrompt: getAutoGLMChineseLocatePrompt
37
+ }),
38
+ 'auto-glm-multilingual': createAutoGlmAdapter({
39
+ getPlanPrompt: getAutoGLMMultilingualPlanPrompt,
40
+ getLocatePrompt: getAutoGLMMultilingualLocatePrompt
41
+ })
42
+ };
43
+ export { autoGlmAdapters };
44
+
45
+ //# sourceMappingURL=adapter.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai-model/models/auto-glm/adapter.mjs","sources":["../../../../../src/ai-model/models/auto-glm/adapter.ts"],"sourcesContent":["import type { TModelFamily } from '@midscene/shared/env';\nimport type { ModelAdapterDefinition } from '../types';\nimport { autoGlmLocate } from './locate';\nimport { autoGlmPlanning } from './planning';\nimport {\n getAutoGLMChineseLocatePrompt,\n getAutoGLMChinesePlanPrompt,\n getAutoGLMMultilingualLocatePrompt,\n getAutoGLMMultilingualPlanPrompt,\n} from './prompt';\n\nconst defaultAutoGlmReplanningCycleLimit = 100;\n\nfunction createAutoGlmAdapter({\n getPlanPrompt,\n getLocatePrompt,\n}: {\n getPlanPrompt: () => string;\n getLocatePrompt: () => string;\n}): ModelAdapterDefinition {\n return {\n chatCompletion: {\n unsupportedUserConfig: [\n 'reasoningEnabled',\n 'reasoningEffort',\n 'reasoningBudget',\n ],\n buildChatCompletionParams: ({ midsceneDefaults, userConfig }) => ({\n config: {\n temperature: userConfig.temperature ?? midsceneDefaults.temperature,\n top_p: 0.85,\n frequency_penalty: 0.2,\n },\n }),\n },\n planning: {\n kind: 'custom',\n cacheEnabled: false,\n defaultReplanningCycleLimit: defaultAutoGlmReplanningCycleLimit,\n planFn: (userInstruction, options) =>\n autoGlmPlanning(userInstruction, options, getPlanPrompt),\n },\n locate: {\n kind: 'custom',\n locateFn: (elementDescription, options) =>\n autoGlmLocate(elementDescription, options, getLocatePrompt),\n },\n };\n}\n\nexport const autoGlmAdapters = {\n 'auto-glm': createAutoGlmAdapter({\n getPlanPrompt: getAutoGLMChinesePlanPrompt,\n getLocatePrompt: getAutoGLMChineseLocatePrompt,\n }),\n 'auto-glm-multilingual': createAutoGlmAdapter({\n getPlanPrompt: getAutoGLMMultilingualPlanPrompt,\n getLocatePrompt: getAutoGLMMultilingualLocatePrompt,\n }),\n} satisfies Pick<\n Record<TModelFamily, ModelAdapterDefinition>,\n 'auto-glm' | 'auto-glm-multilingual'\n>;\n"],"names":["defaultAutoGlmReplanningCycleLimit","createAutoGlmAdapter","getPlanPrompt","getLocatePrompt","midsceneDefaults","userConfig","userInstruction","options","autoGlmPlanning","elementDescription","autoGlmLocate","autoGlmAdapters","getAutoGLMChinesePlanPrompt","getAutoGLMChineseLocatePrompt","getAutoGLMMultilingualPlanPrompt","getAutoGLMMultilingualLocatePrompt"],"mappings":";;;AAWA,MAAMA,qCAAqC;AAE3C,SAASC,qBAAqB,EAC5BC,aAAa,EACbC,eAAe,EAIhB;IACC,OAAO;QACL,gBAAgB;YACd,uBAAuB;gBACrB;gBACA;gBACA;aACD;YACD,2BAA2B,CAAC,EAAEC,gBAAgB,EAAEC,UAAU,EAAE,GAAM;oBAChE,QAAQ;wBACN,aAAaA,WAAW,WAAW,IAAID,iBAAiB,WAAW;wBACnE,OAAO;wBACP,mBAAmB;oBACrB;gBACF;QACF;QACA,UAAU;YACR,MAAM;YACN,cAAc;YACd,6BAA6BJ;YAC7B,QAAQ,CAACM,iBAAiBC,UACxBC,gBAAgBF,iBAAiBC,SAASL;QAC9C;QACA,QAAQ;YACN,MAAM;YACN,UAAU,CAACO,oBAAoBF,UAC7BG,cAAcD,oBAAoBF,SAASJ;QAC/C;IACF;AACF;AAEO,MAAMQ,kBAAkB;IAC7B,YAAYV,qBAAqB;QAC/B,eAAeW;QACf,iBAAiBC;IACnB;IACA,yBAAyBZ,qBAAqB;QAC5C,eAAea;QACf,iBAAiBC;IACnB;AACF"}