@midscene/core 1.8.11 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (304) hide show
  1. package/dist/es/agent/agent.mjs +40 -50
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/task-builder.mjs +39 -19
  4. package/dist/es/agent/task-builder.mjs.map +1 -1
  5. package/dist/es/agent/tasks.mjs +24 -22
  6. package/dist/es/agent/tasks.mjs.map +1 -1
  7. package/dist/es/agent/utils.mjs +11 -14
  8. package/dist/es/agent/utils.mjs.map +1 -1
  9. package/dist/es/ai-model/connectivity.mjs +7 -3
  10. package/dist/es/ai-model/connectivity.mjs.map +1 -1
  11. package/dist/es/ai-model/errors.mjs +9 -0
  12. package/dist/es/ai-model/errors.mjs.map +1 -0
  13. package/dist/es/ai-model/index.mjs +3 -4
  14. package/dist/es/ai-model/inspect.mjs +132 -144
  15. package/dist/es/ai-model/inspect.mjs.map +1 -1
  16. package/dist/es/ai-model/llm-planning.mjs +46 -28
  17. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  18. package/dist/es/ai-model/{auto-glm → models/auto-glm}/actions.mjs +22 -44
  19. package/dist/es/ai-model/models/auto-glm/actions.mjs.map +1 -0
  20. package/dist/es/ai-model/models/auto-glm/adapter.mjs +45 -0
  21. package/dist/es/ai-model/models/auto-glm/adapter.mjs.map +1 -0
  22. package/dist/es/ai-model/models/auto-glm/locate.mjs +112 -0
  23. package/dist/es/ai-model/models/auto-glm/locate.mjs.map +1 -0
  24. package/dist/es/ai-model/models/auto-glm/parser.mjs.map +1 -0
  25. package/dist/es/ai-model/{auto-glm → models/auto-glm}/planning.mjs +6 -7
  26. package/dist/es/ai-model/models/auto-glm/planning.mjs.map +1 -0
  27. package/dist/es/ai-model/{auto-glm → models/auto-glm}/prompt.mjs +3 -11
  28. package/dist/es/ai-model/models/auto-glm/prompt.mjs.map +1 -0
  29. package/dist/es/ai-model/models/default.mjs +12 -0
  30. package/dist/es/ai-model/models/default.mjs.map +1 -0
  31. package/dist/es/ai-model/models/doubao.mjs +138 -0
  32. package/dist/es/ai-model/models/doubao.mjs.map +1 -0
  33. package/dist/es/ai-model/models/gemini.mjs +34 -0
  34. package/dist/es/ai-model/models/gemini.mjs.map +1 -0
  35. package/dist/es/ai-model/models/glm.mjs +37 -0
  36. package/dist/es/ai-model/models/glm.mjs.map +1 -0
  37. package/dist/es/ai-model/models/gpt.mjs +31 -0
  38. package/dist/es/ai-model/models/gpt.mjs.map +1 -0
  39. package/dist/es/ai-model/models/index.mjs +2 -0
  40. package/dist/es/ai-model/models/qwen.mjs +113 -0
  41. package/dist/es/ai-model/models/qwen.mjs.map +1 -0
  42. package/dist/es/ai-model/models/registry.mjs +45 -0
  43. package/dist/es/ai-model/models/registry.mjs.map +1 -0
  44. package/dist/es/ai-model/models/resolved.mjs +104 -0
  45. package/dist/es/ai-model/models/resolved.mjs.map +1 -0
  46. package/dist/es/ai-model/models/types.mjs +0 -0
  47. package/dist/es/ai-model/models/ui-tars/adapter.mjs +142 -0
  48. package/dist/es/ai-model/models/ui-tars/adapter.mjs.map +1 -0
  49. package/dist/es/ai-model/{ui-tars-planning.mjs → models/ui-tars/planning.mjs} +44 -62
  50. package/dist/es/ai-model/models/ui-tars/planning.mjs.map +1 -0
  51. package/dist/es/ai-model/prompt/extraction.mjs +3 -3
  52. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -1
  53. package/dist/es/ai-model/prompt/llm-locator.mjs +11 -11
  54. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
  55. package/dist/es/ai-model/prompt/llm-planning.mjs +25 -60
  56. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
  57. package/dist/es/ai-model/prompt/llm-section-locator.mjs +15 -10
  58. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
  59. package/dist/es/ai-model/prompt/locate-grounding-rules.mjs +9 -0
  60. package/dist/es/ai-model/prompt/locate-grounding-rules.mjs.map +1 -0
  61. package/dist/es/ai-model/prompt/locate-param-example.mjs +15 -0
  62. package/dist/es/ai-model/prompt/locate-param-example.mjs.map +1 -0
  63. package/dist/es/ai-model/prompt/playwright-generator.mjs +5 -5
  64. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
  65. package/dist/es/ai-model/prompt/yaml-generator.mjs +5 -5
  66. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
  67. package/dist/es/ai-model/prompts/locate-result-coordinates.mjs +107 -0
  68. package/dist/es/ai-model/prompts/locate-result-coordinates.mjs.map +1 -0
  69. package/dist/es/ai-model/service-caller/index.mjs +59 -190
  70. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  71. package/dist/es/ai-model/service-caller/json.mjs +60 -0
  72. package/dist/es/ai-model/service-caller/json.mjs.map +1 -0
  73. package/dist/es/ai-model/shared/model-locate-result/bbox.mjs +68 -0
  74. package/dist/es/ai-model/shared/model-locate-result/bbox.mjs.map +1 -0
  75. package/dist/es/ai-model/shared/model-locate-result/factory.mjs +96 -0
  76. package/dist/es/ai-model/shared/model-locate-result/factory.mjs.map +1 -0
  77. package/dist/es/ai-model/shared/model-locate-result/index.mjs +3 -0
  78. package/dist/es/ai-model/shared/model-locate-result/parse.mjs +41 -0
  79. package/dist/es/ai-model/shared/model-locate-result/parse.mjs.map +1 -0
  80. package/dist/es/ai-model/shared/model-locate-result/pixel-bbox-mapper.mjs +64 -0
  81. package/dist/es/ai-model/shared/model-locate-result/pixel-bbox-mapper.mjs.map +1 -0
  82. package/dist/es/ai-model/shared/model-locate-result/types.mjs +0 -0
  83. package/dist/es/ai-model/types.mjs +0 -0
  84. package/dist/es/ai-model/workflows/image-preprocess.mjs +27 -0
  85. package/dist/es/ai-model/workflows/image-preprocess.mjs.map +1 -0
  86. package/dist/es/ai-model/workflows/inspect/index.mjs +2 -0
  87. package/dist/es/ai-model/workflows/inspect/locate-result-rect.mjs +23 -0
  88. package/dist/es/ai-model/workflows/inspect/locate-result-rect.mjs.map +1 -0
  89. package/dist/es/ai-model/workflows/inspect/search-area-mapping.mjs +18 -0
  90. package/dist/es/ai-model/workflows/inspect/search-area-mapping.mjs.map +1 -0
  91. package/dist/es/ai-model/workflows/inspect/types.mjs +0 -0
  92. package/dist/es/ai-model/workflows/planning/index.mjs +5 -0
  93. package/dist/es/ai-model/workflows/planning/index.mjs.map +1 -0
  94. package/dist/es/ai-model/workflows/planning/types.mjs +0 -0
  95. package/dist/es/common.mjs +2 -174
  96. package/dist/es/common.mjs.map +1 -1
  97. package/dist/es/device/index.mjs.map +1 -1
  98. package/dist/es/service/index.mjs +96 -69
  99. package/dist/es/service/index.mjs.map +1 -1
  100. package/dist/es/types.mjs.map +1 -1
  101. package/dist/es/utils.mjs +2 -2
  102. package/dist/es/yaml/player.mjs +4 -3
  103. package/dist/es/yaml/player.mjs.map +1 -1
  104. package/dist/lib/agent/agent.js +43 -53
  105. package/dist/lib/agent/agent.js.map +1 -1
  106. package/dist/lib/agent/task-builder.js +38 -18
  107. package/dist/lib/agent/task-builder.js.map +1 -1
  108. package/dist/lib/agent/tasks.js +23 -21
  109. package/dist/lib/agent/tasks.js.map +1 -1
  110. package/dist/lib/agent/utils.js +17 -17
  111. package/dist/lib/agent/utils.js.map +1 -1
  112. package/dist/lib/ai-model/connectivity.js +7 -3
  113. package/dist/lib/ai-model/connectivity.js.map +1 -1
  114. package/dist/lib/ai-model/errors.js +46 -0
  115. package/dist/lib/ai-model/errors.js.map +1 -0
  116. package/dist/lib/ai-model/index.js +7 -14
  117. package/dist/lib/ai-model/inspect.js +141 -144
  118. package/dist/lib/ai-model/inspect.js.map +1 -1
  119. package/dist/lib/ai-model/llm-planning.js +44 -26
  120. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  121. package/dist/lib/ai-model/{auto-glm → models/auto-glm}/actions.js +22 -44
  122. package/dist/lib/ai-model/models/auto-glm/actions.js.map +1 -0
  123. package/dist/lib/ai-model/models/auto-glm/adapter.js +79 -0
  124. package/dist/lib/ai-model/models/auto-glm/adapter.js.map +1 -0
  125. package/dist/lib/ai-model/models/auto-glm/locate.js +146 -0
  126. package/dist/lib/ai-model/models/auto-glm/locate.js.map +1 -0
  127. package/dist/lib/ai-model/models/auto-glm/parser.js.map +1 -0
  128. package/dist/lib/ai-model/{auto-glm → models/auto-glm}/planning.js +8 -9
  129. package/dist/lib/ai-model/models/auto-glm/planning.js.map +1 -0
  130. package/dist/lib/ai-model/{auto-glm → models/auto-glm}/prompt.js +14 -16
  131. package/dist/lib/ai-model/models/auto-glm/prompt.js.map +1 -0
  132. package/dist/lib/ai-model/{auto-glm/util.js → models/default.js} +13 -13
  133. package/dist/lib/ai-model/models/default.js.map +1 -0
  134. package/dist/lib/ai-model/models/doubao.js +184 -0
  135. package/dist/lib/ai-model/models/doubao.js.map +1 -0
  136. package/dist/lib/ai-model/models/gemini.js +68 -0
  137. package/dist/lib/ai-model/models/gemini.js.map +1 -0
  138. package/dist/lib/ai-model/models/glm.js +71 -0
  139. package/dist/lib/ai-model/models/glm.js.map +1 -0
  140. package/dist/lib/ai-model/models/gpt.js +65 -0
  141. package/dist/lib/ai-model/models/gpt.js.map +1 -0
  142. package/dist/lib/ai-model/{service-caller/image-detail.js → models/index.js} +8 -7
  143. package/dist/lib/ai-model/models/index.js.map +1 -0
  144. package/dist/lib/ai-model/models/qwen.js +147 -0
  145. package/dist/lib/ai-model/models/qwen.js.map +1 -0
  146. package/dist/lib/ai-model/models/registry.js +85 -0
  147. package/dist/lib/ai-model/models/registry.js.map +1 -0
  148. package/dist/lib/ai-model/models/resolved.js +138 -0
  149. package/dist/lib/ai-model/models/resolved.js.map +1 -0
  150. package/dist/lib/ai-model/models/types.js +20 -0
  151. package/dist/lib/ai-model/models/types.js.map +1 -0
  152. package/dist/lib/ai-model/models/ui-tars/adapter.js +176 -0
  153. package/dist/lib/ai-model/models/ui-tars/adapter.js.map +1 -0
  154. package/dist/lib/ai-model/{ui-tars-planning.js → models/ui-tars/planning.js} +44 -62
  155. package/dist/lib/ai-model/models/ui-tars/planning.js.map +1 -0
  156. package/dist/lib/ai-model/prompt/extraction.js +3 -3
  157. package/dist/lib/ai-model/prompt/extraction.js.map +1 -1
  158. package/dist/lib/ai-model/prompt/llm-locator.js +11 -11
  159. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
  160. package/dist/lib/ai-model/prompt/llm-planning.js +25 -60
  161. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
  162. package/dist/lib/ai-model/prompt/llm-section-locator.js +15 -10
  163. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
  164. package/dist/lib/ai-model/prompt/locate-grounding-rules.js +43 -0
  165. package/dist/lib/ai-model/prompt/locate-grounding-rules.js.map +1 -0
  166. package/dist/lib/ai-model/prompt/locate-param-example.js +52 -0
  167. package/dist/lib/ai-model/prompt/locate-param-example.js.map +1 -0
  168. package/dist/lib/ai-model/prompt/playwright-generator.js +5 -5
  169. package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
  170. package/dist/lib/ai-model/prompt/yaml-generator.js +5 -5
  171. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
  172. package/dist/lib/ai-model/prompts/locate-result-coordinates.js +150 -0
  173. package/dist/lib/ai-model/prompts/locate-result-coordinates.js.map +1 -0
  174. package/dist/lib/ai-model/service-caller/index.js +68 -199
  175. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  176. package/dist/lib/ai-model/service-caller/json.js +100 -0
  177. package/dist/lib/ai-model/service-caller/json.js.map +1 -0
  178. package/dist/lib/ai-model/shared/model-locate-result/bbox.js +117 -0
  179. package/dist/lib/ai-model/shared/model-locate-result/bbox.js.map +1 -0
  180. package/dist/lib/ai-model/shared/model-locate-result/factory.js +130 -0
  181. package/dist/lib/ai-model/shared/model-locate-result/factory.js.map +1 -0
  182. package/dist/lib/ai-model/{prompt/common.js → shared/model-locate-result/index.js} +9 -9
  183. package/dist/lib/ai-model/shared/model-locate-result/index.js.map +1 -0
  184. package/dist/lib/ai-model/shared/model-locate-result/parse.js +78 -0
  185. package/dist/lib/ai-model/shared/model-locate-result/parse.js.map +1 -0
  186. package/dist/lib/ai-model/shared/model-locate-result/pixel-bbox-mapper.js +98 -0
  187. package/dist/lib/ai-model/shared/model-locate-result/pixel-bbox-mapper.js.map +1 -0
  188. package/dist/lib/ai-model/shared/model-locate-result/types.js +20 -0
  189. package/dist/lib/ai-model/shared/model-locate-result/types.js.map +1 -0
  190. package/dist/lib/ai-model/types.js +20 -0
  191. package/dist/lib/ai-model/types.js.map +1 -0
  192. package/dist/lib/ai-model/workflows/image-preprocess.js +61 -0
  193. package/dist/lib/ai-model/workflows/image-preprocess.js.map +1 -0
  194. package/dist/lib/ai-model/workflows/inspect/index.js +50 -0
  195. package/dist/lib/ai-model/workflows/inspect/index.js.map +1 -0
  196. package/dist/lib/ai-model/workflows/inspect/locate-result-rect.js +60 -0
  197. package/dist/lib/ai-model/workflows/inspect/locate-result-rect.js.map +1 -0
  198. package/dist/lib/ai-model/workflows/inspect/search-area-mapping.js +52 -0
  199. package/dist/lib/ai-model/workflows/inspect/search-area-mapping.js.map +1 -0
  200. package/dist/lib/ai-model/workflows/inspect/types.js +20 -0
  201. package/dist/lib/ai-model/workflows/inspect/types.js.map +1 -0
  202. package/dist/lib/ai-model/{model-family.js → workflows/planning/index.js} +6 -7
  203. package/dist/lib/ai-model/workflows/planning/index.js.map +1 -0
  204. package/dist/lib/ai-model/workflows/planning/types.js +20 -0
  205. package/dist/lib/ai-model/workflows/planning/types.js.map +1 -0
  206. package/dist/lib/common.js +4 -206
  207. package/dist/lib/common.js.map +1 -1
  208. package/dist/lib/device/index.js.map +1 -1
  209. package/dist/lib/service/index.js +96 -69
  210. package/dist/lib/service/index.js.map +1 -1
  211. package/dist/lib/types.js.map +1 -1
  212. package/dist/lib/utils.js +2 -2
  213. package/dist/lib/yaml/player.js +4 -3
  214. package/dist/lib/yaml/player.js.map +1 -1
  215. package/dist/types/agent/agent.d.ts +14 -6
  216. package/dist/types/agent/task-builder.d.ts +2 -2
  217. package/dist/types/agent/tasks.d.ts +6 -6
  218. package/dist/types/agent/utils.d.ts +8 -5
  219. package/dist/types/ai-model/errors.d.ts +2 -0
  220. package/dist/types/ai-model/index.d.ts +2 -4
  221. package/dist/types/ai-model/inspect.d.ts +13 -33
  222. package/dist/types/ai-model/llm-planning.d.ts +6 -17
  223. package/dist/types/ai-model/{auto-glm → models/auto-glm}/actions.d.ts +2 -2
  224. package/dist/types/ai-model/models/auto-glm/adapter.d.ts +5 -0
  225. package/dist/types/ai-model/models/auto-glm/locate.d.ts +3 -0
  226. package/dist/types/ai-model/models/auto-glm/planning.d.ts +3 -0
  227. package/dist/types/ai-model/models/auto-glm/prompt.d.ts +4 -0
  228. package/dist/types/ai-model/models/default.d.ts +2 -0
  229. package/dist/types/ai-model/models/doubao.d.ts +10 -0
  230. package/dist/types/ai-model/models/gemini.d.ts +18 -0
  231. package/dist/types/ai-model/models/glm.d.ts +18 -0
  232. package/dist/types/ai-model/models/gpt.d.ts +18 -0
  233. package/dist/types/ai-model/models/index.d.ts +2 -0
  234. package/dist/types/ai-model/models/qwen.d.ts +30 -0
  235. package/dist/types/ai-model/models/registry.d.ts +81 -0
  236. package/dist/types/ai-model/models/resolved.d.ts +9 -0
  237. package/dist/types/ai-model/models/types.d.ts +102 -0
  238. package/dist/types/ai-model/models/ui-tars/adapter.d.ts +6 -0
  239. package/dist/types/ai-model/{ui-tars-planning.d.ts → models/ui-tars/planning.d.ts} +7 -11
  240. package/dist/types/ai-model/prompt/llm-locator.d.ts +2 -2
  241. package/dist/types/ai-model/prompt/llm-planning.d.ts +5 -5
  242. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +2 -2
  243. package/dist/types/ai-model/prompt/locate-grounding-rules.d.ts +1 -0
  244. package/dist/types/ai-model/prompt/locate-param-example.d.ts +3 -0
  245. package/dist/types/ai-model/prompt/playwright-generator.d.ts +3 -3
  246. package/dist/types/ai-model/prompt/yaml-generator.d.ts +3 -3
  247. package/dist/types/ai-model/prompts/locate-result-coordinates.d.ts +6 -0
  248. package/dist/types/ai-model/service-caller/index.d.ts +19 -27
  249. package/dist/types/ai-model/service-caller/json.d.ts +9 -0
  250. package/dist/types/ai-model/shared/model-locate-result/bbox.d.ts +7 -0
  251. package/dist/types/ai-model/shared/model-locate-result/factory.d.ts +2 -0
  252. package/dist/types/ai-model/shared/model-locate-result/index.d.ts +3 -0
  253. package/dist/types/ai-model/shared/model-locate-result/parse.d.ts +5 -0
  254. package/dist/types/ai-model/shared/model-locate-result/pixel-bbox-mapper.d.ts +7 -0
  255. package/dist/types/ai-model/shared/model-locate-result/types.d.ts +157 -0
  256. package/dist/types/ai-model/types.d.ts +2 -0
  257. package/dist/types/ai-model/workflows/image-preprocess.d.ts +30 -0
  258. package/dist/types/ai-model/workflows/inspect/index.d.ts +1 -0
  259. package/dist/types/ai-model/workflows/inspect/locate-result-rect.d.ts +4 -0
  260. package/dist/types/ai-model/workflows/inspect/search-area-mapping.d.ts +3 -0
  261. package/dist/types/ai-model/workflows/inspect/types.d.ts +37 -0
  262. package/dist/types/ai-model/workflows/planning/index.d.ts +2 -0
  263. package/dist/types/ai-model/workflows/planning/types.d.ts +15 -0
  264. package/dist/types/common.d.ts +0 -30
  265. package/dist/types/device/index.d.ts +22 -22
  266. package/dist/types/service/index.d.ts +5 -4
  267. package/dist/types/types.d.ts +21 -9
  268. package/dist/types/yaml.d.ts +8 -2
  269. package/package.json +2 -2
  270. package/dist/es/ai-model/auto-glm/actions.mjs.map +0 -1
  271. package/dist/es/ai-model/auto-glm/index.mjs +0 -6
  272. package/dist/es/ai-model/auto-glm/parser.mjs.map +0 -1
  273. package/dist/es/ai-model/auto-glm/planning.mjs.map +0 -1
  274. package/dist/es/ai-model/auto-glm/prompt.mjs.map +0 -1
  275. package/dist/es/ai-model/auto-glm/util.mjs +0 -9
  276. package/dist/es/ai-model/auto-glm/util.mjs.map +0 -1
  277. package/dist/es/ai-model/model-family.mjs +0 -6
  278. package/dist/es/ai-model/model-family.mjs.map +0 -1
  279. package/dist/es/ai-model/prompt/common.mjs +0 -8
  280. package/dist/es/ai-model/prompt/common.mjs.map +0 -1
  281. package/dist/es/ai-model/service-caller/image-detail.mjs +0 -6
  282. package/dist/es/ai-model/service-caller/image-detail.mjs.map +0 -1
  283. package/dist/es/ai-model/ui-tars-planning.mjs.map +0 -1
  284. package/dist/lib/ai-model/auto-glm/actions.js.map +0 -1
  285. package/dist/lib/ai-model/auto-glm/index.js +0 -66
  286. package/dist/lib/ai-model/auto-glm/index.js.map +0 -1
  287. package/dist/lib/ai-model/auto-glm/parser.js.map +0 -1
  288. package/dist/lib/ai-model/auto-glm/planning.js.map +0 -1
  289. package/dist/lib/ai-model/auto-glm/prompt.js.map +0 -1
  290. package/dist/lib/ai-model/auto-glm/util.js.map +0 -1
  291. package/dist/lib/ai-model/model-family.js.map +0 -1
  292. package/dist/lib/ai-model/prompt/common.js.map +0 -1
  293. package/dist/lib/ai-model/service-caller/image-detail.js.map +0 -1
  294. package/dist/lib/ai-model/ui-tars-planning.js.map +0 -1
  295. package/dist/types/ai-model/auto-glm/index.d.ts +0 -6
  296. package/dist/types/ai-model/auto-glm/planning.d.ts +0 -12
  297. package/dist/types/ai-model/auto-glm/prompt.d.ts +0 -27
  298. package/dist/types/ai-model/auto-glm/util.d.ts +0 -13
  299. package/dist/types/ai-model/model-family.d.ts +0 -7
  300. package/dist/types/ai-model/prompt/common.d.ts +0 -2
  301. package/dist/types/ai-model/service-caller/image-detail.d.ts +0 -2
  302. /package/dist/es/ai-model/{auto-glm → models/auto-glm}/parser.mjs +0 -0
  303. /package/dist/lib/ai-model/{auto-glm → models/auto-glm}/parser.js +0 -0
  304. /package/dist/types/ai-model/{auto-glm → models/auto-glm}/parser.d.ts +0 -0
@@ -27,19 +27,20 @@ __webpack_require__.d(__webpack_exports__, {
27
27
  parseXMLPlanningResponse: ()=>parseXMLPlanningResponse,
28
28
  plan: ()=>plan
29
29
  });
30
- const img_namespaceObject = require("@midscene/shared/img");
31
30
  const logger_namespaceObject = require("@midscene/shared/logger");
32
31
  const utils_namespaceObject = require("@midscene/shared/utils");
33
32
  const external_common_js_namespaceObject = require("../common.js");
33
+ const external_errors_js_namespaceObject = require("./errors.js");
34
34
  const llm_planning_js_namespaceObject = require("./prompt/llm-planning.js");
35
35
  const util_js_namespaceObject = require("./prompt/util.js");
36
36
  const index_js_namespaceObject = require("./service-caller/index.js");
37
+ const image_preprocess_js_namespaceObject = require("./workflows/image-preprocess.js");
37
38
  const debug = (0, logger_namespaceObject.getDebug)('planning');
38
39
  const warnLog = (0, logger_namespaceObject.getDebug)('planning', {
39
40
  console: true
40
41
  });
41
42
  const noPreviousActionsText = 'No previous actions have been executed in this aiAct execution yet. If the instruction asks for actions, choose the first action to execute.';
42
- function parseXMLPlanningResponse(xmlString, modelFamily) {
43
+ function parseXMLPlanningResponse(xmlString, jsonParser) {
43
44
  const thought = (0, util_js_namespaceObject.extractXMLTag)(xmlString, 'thought');
44
45
  const memory = (0, util_js_namespaceObject.extractXMLTag)(xmlString, 'memory');
45
46
  const log = (0, util_js_namespaceObject.extractXMLTag)(xmlString, 'log') || '';
@@ -63,11 +64,12 @@ function parseXMLPlanningResponse(xmlString, modelFamily) {
63
64
  const type = actionType.split('<')[0].trim();
64
65
  let param;
65
66
  if (actionParamStr) try {
66
- param = (0, index_js_namespaceObject.parseModelResponseJson)(actionParamStr, modelFamily, 'input' === type.toLowerCase() ? {
67
- preserveStringValueKeys: [
67
+ param = jsonParser(actionParamStr, {
68
+ source: 'planning-action-param',
69
+ preserveStringValueKeys: 'input' === type.toLowerCase() ? [
68
70
  'value'
69
- ]
70
- } : void 0);
71
+ ] : void 0
72
+ });
71
73
  } catch (e) {
72
74
  throw new Error(`Failed to parse action-param-json: ${e}`);
73
75
  }
@@ -105,27 +107,28 @@ function parseXMLPlanningResponse(xmlString, modelFamily) {
105
107
  };
106
108
  }
107
109
  async function plan(userInstruction, opts) {
108
- const { context, modelConfig, conversationHistory } = opts;
110
+ const { context, conversationHistory } = opts;
111
+ const modelRuntime = opts.modelRuntime;
112
+ const { adapter } = modelRuntime;
109
113
  const { shotSize } = context;
110
114
  const screenshotBase64 = context.screenshot.base64;
111
- const { modelFamily } = modelConfig;
115
+ if (opts.includeLocateInPlanning && !modelRuntime.config.modelFamily) throw new Error((0, external_errors_js_namespaceObject.planningModelFamilyRequiredForLocateMessage)(modelRuntime.config.slot));
116
+ const locateResultAdapter = modelRuntime.config.modelFamily && 'standard' === adapter.locate.kind ? adapter.locate.resultAdapter : void 0;
112
117
  const includeSubGoals = true === opts.deepThink;
113
118
  const systemPrompt = await (0, llm_planning_js_namespaceObject.systemPromptToTaskPlanning)({
114
119
  actionSpace: opts.actionSpace,
115
- modelFamily,
116
- includeBbox: opts.includeBbox,
120
+ locatePromptSpec: locateResultAdapter?.promptSpec,
121
+ includeLocateInPlanning: opts.includeLocateInPlanning,
117
122
  includeThought: true,
118
123
  includeSubGoals
119
124
  });
120
- let imagePayload = screenshotBase64;
121
- let imageWidth = shotSize.width;
122
- let imageHeight = shotSize.height;
123
- if ('qwen2.5-vl' === modelFamily) {
124
- const paddedResult = await (0, img_namespaceObject.paddingToMatchBlockByBase64)(imagePayload);
125
- imageWidth = paddedResult.width;
126
- imageHeight = paddedResult.height;
127
- imagePayload = paddedResult.imageBase64;
128
- }
125
+ const preparedImage = await (0, image_preprocess_js_namespaceObject.prepareModelImage)({
126
+ imageBase64: screenshotBase64,
127
+ width: shotSize.width,
128
+ height: shotSize.height,
129
+ policy: adapter.imagePreprocess
130
+ });
131
+ const imagePayload = preparedImage.imageBase64;
129
132
  const actionContext = opts.actionContext ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\n` : '';
130
133
  const instruction = [
131
134
  {
@@ -188,23 +191,23 @@ async function plan(userInstruction, opts) {
188
191
  ...instruction,
189
192
  ...historyLog
190
193
  ];
191
- let { content: rawResponse, usage, reasoning_content } = await (0, index_js_namespaceObject.callAI)(msgs, modelConfig, {
194
+ let { content: rawResponse, usage, reasoning_content } = await (0, index_js_namespaceObject.callAI)(msgs, modelRuntime, {
192
195
  abortSignal: opts.abortSignal,
193
- forceOriginalImageDetail: 'gpt-5' === modelFamily && opts.includeBbox
196
+ requiresOriginalImageDetail: opts.includeLocateInPlanning
194
197
  });
195
198
  let planFromAI;
196
199
  try {
197
200
  try {
198
- planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);
201
+ planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);
199
202
  } catch {
200
- const retry = await (0, index_js_namespaceObject.callAI)(msgs, modelConfig, {
203
+ const retry = await (0, index_js_namespaceObject.callAI)(msgs, modelRuntime, {
201
204
  abortSignal: opts.abortSignal,
202
- forceOriginalImageDetail: 'gpt-5' === modelFamily && opts.includeBbox
205
+ requiresOriginalImageDetail: opts.includeLocateInPlanning
203
206
  });
204
207
  rawResponse = retry.content;
205
208
  usage = retry.usage;
206
209
  reasoning_content = retry.reasoning_content;
207
- planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);
210
+ planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);
208
211
  }
209
212
  if (planFromAI.action && void 0 !== planFromAI.finalizeSuccess) {
210
213
  warnLog('Planning response included both an action and <complete>; ignoring <complete> output.');
@@ -238,7 +241,22 @@ async function plan(userInstruction, opts) {
238
241
  debug('locateFields', locateFields);
239
242
  locateFields.forEach((field)=>{
240
243
  const locateResult = action.param[field];
241
- if (locateResult && void 0 !== modelFamily) action.param[field] = (0, external_common_js_namespaceObject.fillBboxParam)(locateResult, imageWidth, imageHeight, modelFamily);
244
+ if (locateResult) {
245
+ if (!opts.includeLocateInPlanning) {
246
+ if ('object' == typeof locateResult) action.param[field] = {
247
+ prompt: locateResult.prompt
248
+ };
249
+ return;
250
+ }
251
+ (0, utils_namespaceObject.assert)(locateResultAdapter, 'generic planning locate normalization requires a standard locate adapter');
252
+ action.param[field] = {
253
+ ...locateResult,
254
+ locatedPixelBbox: locateResultAdapter.adaptPlanningParamToPixelBbox(locateResult, {
255
+ preparedSize: preparedImage.preparedSize,
256
+ contentSize: preparedImage.contentSize
257
+ })
258
+ };
259
+ }
242
260
  });
243
261
  });
244
262
  if (includeSubGoals) {
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig, TModelFamily } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n buildYamlFlowFromPlans,\n fillBboxParam,\n findAllMidsceneLocatorField,\n} from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport {\n extractXMLTag,\n parseMarkFinishedIndexes,\n parseSubGoalsFromXML,\n} from './prompt/util';\nimport {\n AIResponseParseError,\n callAI,\n parseModelResponseJson,\n} from './service-caller/index';\n\nconst debug = getDebug('planning');\nconst warnLog = getDebug('planning', { console: true });\n\nconst noPreviousActionsText =\n 'No previous actions have been executed in this aiAct execution yet. If the instruction asks for actions, choose the first action to execute.';\n\n/**\n * Parse XML response from LLM and convert to RawResponsePlanningAIResponse\n */\nexport function parseXMLPlanningResponse(\n xmlString: string,\n modelFamily: TModelFamily | undefined,\n): RawResponsePlanningAIResponse {\n const thought = extractXMLTag(xmlString, 'thought');\n const memory = extractXMLTag(xmlString, 'memory');\n const log = extractXMLTag(xmlString, 'log') || '';\n const error = extractXMLTag(xmlString, 'error');\n const actionType = extractXMLTag(xmlString, 'action-type');\n const actionParamStr = extractXMLTag(xmlString, 'action-param-json');\n\n // Parse <complete> tag with success attribute\n const completeGoalRegex =\n /<complete\\s+success=\"(true|false)\">([\\s\\S]*?)<\\/complete>/i;\n const completeGoalMatch = xmlString.match(completeGoalRegex);\n let finalizeMessage: string | undefined;\n let finalizeSuccess: boolean | undefined;\n\n if (completeGoalMatch) {\n finalizeSuccess = completeGoalMatch[1] === 'true';\n finalizeMessage = completeGoalMatch[2]?.trim() || undefined;\n }\n\n // Parse sub-goal related tags\n const updatePlanContent = extractXMLTag(xmlString, 'update-plan-content');\n const markSubGoalDone = extractXMLTag(xmlString, 'mark-sub-goal-done');\n\n const updateSubGoals = updatePlanContent\n ? parseSubGoalsFromXML(updatePlanContent)\n : undefined;\n const markFinishedIndexes = markSubGoalDone\n ? parseMarkFinishedIndexes(markSubGoalDone)\n : undefined;\n\n // Parse action\n let action: any = null;\n if (actionType && actionType.toLowerCase() !== 'null') {\n // Strip any trailing XML tags that LLM might have leaked into the action type\n // e.g. \"KeyboardPress</action-type>\\n<action-param-json>\" -> \"KeyboardPress\"\n const type = actionType.split('<')[0].trim();\n let param: any = undefined;\n\n if (actionParamStr) {\n try {\n // Parse the JSON string in action-param-json\n param = parseModelResponseJson(\n actionParamStr,\n modelFamily,\n type.toLowerCase() === 'input'\n ? { preserveStringValueKeys: ['value'] }\n : undefined,\n );\n } catch (e) {\n throw new Error(`Failed to parse action-param-json: ${e}`);\n }\n }\n\n action = {\n type,\n ...(param !== undefined ? { param } : {}),\n };\n }\n\n return {\n ...(thought ? { thought } : {}),\n ...(memory ? { memory } : {}),\n log,\n ...(error ? { error } : {}),\n action,\n ...(finalizeMessage !== undefined ? { finalizeMessage } : {}),\n ...(finalizeSuccess !== undefined ? { finalizeSuccess } : {}),\n ...(updateSubGoals?.length ? { updateSubGoals } : {}),\n ...(markFinishedIndexes?.length ? { markFinishedIndexes } : {}),\n };\n}\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory: ConversationHistory;\n includeBbox: boolean;\n imagesIncludeCount?: number;\n // Controls aiAct planning prompt shape and state updates, such as sub-goals.\n deepThink?: boolean;\n abortSignal?: AbortSignal;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { shotSize } = context;\n const screenshotBase64 = context.screenshot.base64;\n\n const { modelFamily } = modelConfig;\n\n // Only enable sub-goals when aiAct is in deep-thinking planning mode.\n const includeSubGoals = opts.deepThink === true;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n modelFamily,\n includeBbox: opts.includeBbox,\n includeThought: true, // always include thought\n includeSubGoals,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = shotSize.width;\n let imageHeight = shotSize.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n\n // Process image based on VL mode requirements\n if (modelFamily === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n // Build sub-goal status text to include in the message\n // In planning deep-think mode: show full sub-goals with logs\n // Otherwise: show historical execution logs\n const executionProgressText = includeSubGoals\n ? conversationHistory.subGoalsToText()\n : conversationHistory.historicalLogsToText();\n const executionProgressSection = executionProgressText\n ? `\\n\\n${executionProgressText}`\n : conversationHistory.pendingFeedbackMessage\n ? ''\n : `\\n\\n${noPreviousActionsText}`;\n\n // Build memories text to include in the message\n const memoriesText = conversationHistory.memoriesToText();\n const memoriesSection = memoriesText ? `\\n\\n${memoriesText}` : '';\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The previous action has been executed, here is the latest screenshot. Please continue according to the instruction.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `This is the current screenshot.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n\n // Compress history if it exceeds the threshold to avoid context overflow\n conversationHistory.compressHistory(50, 20);\n\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n let {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelConfig, {\n abortSignal: opts.abortSignal,\n // When GPT-5 planning includes bbox, the planning call also performs\n // localization, so the screenshot should be sent with original detail.\n forceOriginalImageDetail: modelFamily === 'gpt-5' && opts.includeBbox,\n });\n\n // Parse XML response to JSON object, retry once on parse failure\n let planFromAI: RawResponsePlanningAIResponse;\n try {\n try {\n planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);\n } catch {\n const retry = await callAI(msgs, modelConfig, {\n abortSignal: opts.abortSignal,\n // Keep retry requests consistent with the initial planning call.\n forceOriginalImageDetail: modelFamily === 'gpt-5' && opts.includeBbox,\n });\n rawResponse = retry.content;\n usage = retry.usage;\n reasoning_content = retry.reasoning_content;\n planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);\n }\n\n if (planFromAI.action && planFromAI.finalizeSuccess !== undefined) {\n warnLog(\n 'Planning response included both an action and <complete>; ignoring <complete> output.',\n );\n planFromAI.finalizeMessage = undefined;\n planFromAI.finalizeSuccess = undefined;\n }\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n\n // Check if task is completed via <complete> tag\n if (planFromAI.finalizeSuccess !== undefined) {\n debug('task completed via <complete> tag, stop planning');\n shouldContinuePlanning = false;\n // Mark all sub-goals as finished when goal is completed in planning deep-think mode.\n if (includeSubGoals) {\n conversationHistory.markAllSubGoalsFinished();\n }\n }\n\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult && modelFamily !== undefined) {\n // Always use model family to fill bbox parameters\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n modelFamily,\n );\n }\n });\n });\n\n // Update sub-goals in conversation history only in planning deep-think mode.\n if (includeSubGoals) {\n if (planFromAI.updateSubGoals?.length) {\n conversationHistory.mergeSubGoals(planFromAI.updateSubGoals);\n }\n if (planFromAI.markFinishedIndexes?.length) {\n for (const index of planFromAI.markFinishedIndexes) {\n conversationHistory.markSubGoalFinished(index);\n }\n }\n // Append the planning log to the currently running sub-goal\n if (planFromAI.log) {\n conversationHistory.appendSubGoalLog(planFromAI.log);\n }\n } else {\n // Without planning deep-think mode, accumulate logs as historical execution steps.\n if (planFromAI.log) {\n conversationHistory.appendHistoricalLog(planFromAI.log);\n }\n }\n\n // Append memory to conversation history if present\n if (planFromAI.memory) {\n conversationHistory.appendMemory(planFromAI.memory);\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","warnLog","noPreviousActionsText","parseXMLPlanningResponse","xmlString","modelFamily","thought","extractXMLTag","memory","log","error","actionType","actionParamStr","completeGoalRegex","completeGoalMatch","finalizeMessage","finalizeSuccess","undefined","updatePlanContent","markSubGoalDone","updateSubGoals","parseSubGoalsFromXML","markFinishedIndexes","parseMarkFinishedIndexes","action","type","param","parseModelResponseJson","e","Error","plan","userInstruction","opts","context","modelConfig","conversationHistory","shotSize","screenshotBase64","includeSubGoals","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","paddedResult","paddingToMatchBlockByBase64","actionContext","instruction","latestFeedbackMessage","executionProgressText","executionProgressSection","memoriesText","memoriesSection","historyLog","msgs","rawResponse","usage","reasoning_content","callAI","planFromAI","retry","actions","shouldContinuePlanning","returnValue","buildYamlFlowFromPlans","assert","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","fillBboxParam","index","parseError","errorMessage","String","AIResponseParseError"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;ACwBA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AACvB,MAAMC,UAAUD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS,YAAY;IAAE,SAAS;AAAK;AAErD,MAAME,wBACJ;AAKK,SAASC,yBACdC,SAAiB,EACjBC,WAAqC;IAErC,MAAMC,UAAUC,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACzC,MAAMI,SAASD,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACxC,MAAMK,MAAMF,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW,UAAU;IAC/C,MAAMM,QAAQH,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACvC,MAAMO,aAAaJ,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAC5C,MAAMQ,iBAAiBL,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAGhD,MAAMS,oBACJ;IACF,MAAMC,oBAAoBV,UAAU,KAAK,CAACS;IAC1C,IAAIE;IACJ,IAAIC;IAEJ,IAAIF,mBAAmB;QACrBE,kBAAkBF,AAAyB,WAAzBA,iBAAiB,CAAC,EAAE;QACtCC,kBAAkBD,iBAAiB,CAAC,EAAE,EAAE,UAAUG;IACpD;IAGA,MAAMC,oBAAoBX,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACnD,MAAMe,kBAAkBZ,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAEjD,MAAMgB,iBAAiBF,oBACnBG,AAAAA,IAAAA,wBAAAA,oBAAAA,AAAAA,EAAqBH,qBACrBD;IACJ,MAAMK,sBAAsBH,kBACxBI,AAAAA,IAAAA,wBAAAA,wBAAAA,AAAAA,EAAyBJ,mBACzBF;IAGJ,IAAIO,SAAc;IAClB,IAAIb,cAAcA,AAA6B,WAA7BA,WAAW,WAAW,IAAe;QAGrD,MAAMc,OAAOd,WAAW,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI;QAC1C,IAAIe;QAEJ,IAAId,gBACF,IAAI;YAEFc,QAAQC,AAAAA,IAAAA,yBAAAA,sBAAAA,AAAAA,EACNf,gBACAP,aACAoB,AAAuB,YAAvBA,KAAK,WAAW,KACZ;gBAAE,yBAAyB;oBAAC;iBAAQ;YAAC,IACrCR;QAER,EAAE,OAAOW,GAAG;YACV,MAAM,IAAIC,MAAM,CAAC,mCAAmC,EAAED,GAAG;QAC3D;QAGFJ,SAAS;YACPC;YACA,GAAIC,AAAUT,WAAVS,QAAsB;gBAAEA;YAAM,IAAI,CAAC,CAAC;QAC1C;IACF;IAEA,OAAO;QACL,GAAIpB,UAAU;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAC9B,GAAIE,SAAS;YAAEA;QAAO,IAAI,CAAC,CAAC;QAC5BC;QACA,GAAIC,QAAQ;YAAEA;QAAM,IAAI,CAAC,CAAC;QAC1Bc;QACA,GAAIT,AAAoBE,WAApBF,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAIC,AAAoBC,WAApBD,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAII,gBAAgB,SAAS;YAAEA;QAAe,IAAI,CAAC,CAAC;QACpD,GAAIE,qBAAqB,SAAS;YAAEA;QAAoB,IAAI,CAAC,CAAC;IAChE;AACF;AAEO,eAAeQ,KACpBC,eAAuB,EACvBC,IAYC;IAED,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGH;IACtD,MAAM,EAAEI,QAAQ,EAAE,GAAGH;IACrB,MAAMI,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM,EAAE5B,WAAW,EAAE,GAAG6B;IAGxB,MAAMI,kBAAkBN,AAAmB,SAAnBA,KAAK,SAAS;IAEtC,MAAMO,eAAe,MAAMC,AAAAA,IAAAA,gCAAAA,0BAAAA,AAAAA,EAA2B;QACpD,aAAaR,KAAK,WAAW;QAC7B3B;QACA,aAAa2B,KAAK,WAAW;QAC7B,gBAAgB;QAChBM;IACF;IAEA,IAAIG,eAAeJ;IACnB,IAAIK,aAAaN,SAAS,KAAK;IAC/B,IAAIO,cAAcP,SAAS,MAAM;IAKjC,IAAI/B,AAAgB,iBAAhBA,aAA8B;QAChC,MAAMuC,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BJ;QACvDC,aAAaE,aAAa,KAAK;QAC/BD,cAAcC,aAAa,MAAM;QACjCH,eAAeG,aAAa,WAAW;IACzC;IAEA,MAAME,gBAAgBd,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMe,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEf,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAIiB;IAKJ,MAAMC,wBAAwBX,kBAC1BH,oBAAoB,cAAc,KAClCA,oBAAoB,oBAAoB;IAC5C,MAAMe,2BAA2BD,wBAC7B,CAAC,IAAI,EAAEA,uBAAuB,GAC9Bd,oBAAoB,sBAAsB,GACxC,KACA,CAAC,IAAI,EAAEjC,uBAAuB;IAGpC,MAAMiD,eAAehB,oBAAoB,cAAc;IACvD,MAAMiB,kBAAkBD,eAAe,CAAC,IAAI,EAAEA,cAAc,GAAG;IAE/D,IAAIhB,oBAAoB,sBAAsB,EAAE;QAC9Ca,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGb,oBAAoB,sBAAsB,CAAC,qHAAqH,EAAEiB,kBAAkBF,0BAA0B;gBACzN;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKT;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAN,oBAAoB,mCAAmC;IACzD,OACEa,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM,CAAC,+BAA+B,EAAEI,kBAAkBF,0BAA0B;YACtF;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKT;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFN,oBAAoB,MAAM,CAACa;IAG3Bb,oBAAoB,eAAe,CAAC,IAAI;IAExC,MAAMkB,aAAalB,oBAAoB,QAAQ,CAACH,KAAK,kBAAkB;IAEvE,MAAMsB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASf;QAAa;WACrCQ;WACAM;KACJ;IAED,IAAI,EACF,SAASE,WAAW,EACpBC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOJ,MAAMpB,aAAa;QAClC,aAAaF,KAAK,WAAW;QAG7B,0BAA0B3B,AAAgB,YAAhBA,eAA2B2B,KAAK,WAAW;IACvE;IAGA,IAAI2B;IACJ,IAAI;QACF,IAAI;YACFA,aAAaxD,yBAAyBoD,aAAalD;QACrD,EAAE,OAAM;YACN,MAAMuD,QAAQ,MAAMF,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOJ,MAAMpB,aAAa;gBAC5C,aAAaF,KAAK,WAAW;gBAE7B,0BAA0B3B,AAAgB,YAAhBA,eAA2B2B,KAAK,WAAW;YACvE;YACAuB,cAAcK,MAAM,OAAO;YAC3BJ,QAAQI,MAAM,KAAK;YACnBH,oBAAoBG,MAAM,iBAAiB;YAC3CD,aAAaxD,yBAAyBoD,aAAalD;QACrD;QAEA,IAAIsD,WAAW,MAAM,IAAIA,AAA+B1C,WAA/B0C,WAAW,eAAe,EAAgB;YACjE1D,QACE;YAEF0D,WAAW,eAAe,GAAG1C;YAC7B0C,WAAW,eAAe,GAAG1C;QAC/B;QAEA,MAAM4C,UAAUF,WAAW,MAAM,GAAG;YAACA,WAAW,MAAM;SAAC,GAAG,EAAE;QAC5D,IAAIG,yBAAyB;QAG7B,IAAIH,AAA+B1C,WAA/B0C,WAAW,eAAe,EAAgB;YAC5C5D,MAAM;YACN+D,yBAAyB;YAEzB,IAAIxB,iBACFH,oBAAoB,uBAAuB;QAE/C;QAEA,MAAM4B,cAAkC;YACtC,GAAGJ,UAAU;YACbE;YACAN;YACAC;YACAC;YACA,UAAUO,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EAAuBH,SAAS7B,KAAK,WAAW;YAC1D8B;QACF;QAEAG,IAAAA,sBAAAA,MAAAA,AAAAA,EAAON,YAAY;QAEnBE,QAAQ,OAAO,CAAC,CAACrC;YACf,MAAMC,OAAOD,OAAO,IAAI;YACxB,MAAM0C,sBAAsBlC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACR,SAAWA,OAAO,IAAI,KAAKC;YAG9B1B,MAAM,+BAA+BmE;YACrC,MAAMC,eAAeD,sBACjBE,AAAAA,IAAAA,mCAAAA,2BAAAA,AAAAA,EAA4BF,oBAAoB,WAAW,IAC3D,EAAE;YAENnE,MAAM,gBAAgBoE;YAEtBA,aAAa,OAAO,CAAC,CAACE;gBACpB,MAAMC,eAAe9C,OAAO,KAAK,CAAC6C,MAAM;gBACxC,IAAIC,gBAAgBjE,AAAgBY,WAAhBZ,aAElBmB,OAAO,KAAK,CAAC6C,MAAM,GAAGE,AAAAA,IAAAA,mCAAAA,aAAAA,AAAAA,EACpBD,cACA5B,YACAC,aACAtC;YAGN;QACF;QAGA,IAAIiC,iBAAiB;YACnB,IAAIqB,WAAW,cAAc,EAAE,QAC7BxB,oBAAoB,aAAa,CAACwB,WAAW,cAAc;YAE7D,IAAIA,WAAW,mBAAmB,EAAE,QAClC,KAAK,MAAMa,SAASb,WAAW,mBAAmB,CAChDxB,oBAAoB,mBAAmB,CAACqC;YAI5C,IAAIb,WAAW,GAAG,EAChBxB,oBAAoB,gBAAgB,CAACwB,WAAW,GAAG;QAEvD,OAEE,IAAIA,WAAW,GAAG,EAChBxB,oBAAoB,mBAAmB,CAACwB,WAAW,GAAG;QAK1D,IAAIA,WAAW,MAAM,EACnBxB,oBAAoB,YAAY,CAACwB,WAAW,MAAM;QAGpDxB,oBAAoB,MAAM,CAAC;YACzB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAMoB;gBACR;aACD;QACH;QAEA,OAAOQ;IACT,EAAE,OAAOU,YAAY;QAEnB,MAAMC,eACJD,sBAAsB5C,QAAQ4C,WAAW,OAAO,GAAGE,OAAOF;QAC5D,MAAM,IAAIG,yBAAAA,oBAAoBA,CAC5B,CAAC,iBAAiB,EAAEF,cAAc,EAClCnB,aACAC;IAEJ;AACF"}
1
+ {"version":3,"file":"ai-model/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n} from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport { buildYamlFlowFromPlans, findAllMidsceneLocatorField } from '../common';\nimport { planningModelFamilyRequiredForLocateMessage } from './errors';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport {\n extractXMLTag,\n parseMarkFinishedIndexes,\n parseSubGoalsFromXML,\n} from './prompt/util';\nimport { AIResponseParseError, callAI } from './service-caller/index';\nimport type { JsonParser, JsonParserSource } from './service-caller/json';\nimport { prepareModelImage } from './workflows/image-preprocess';\nimport type { PlanOptions } from './workflows/planning/types';\n\nconst debug = getDebug('planning');\nconst warnLog = getDebug('planning', { console: true });\n\nconst noPreviousActionsText =\n 'No previous actions have been executed in this aiAct execution yet. If the instruction asks for actions, choose the first action to execute.';\n\n/**\n * Parse XML response from LLM and convert to RawResponsePlanningAIResponse.\n */\nexport function parseXMLPlanningResponse(\n xmlString: string,\n jsonParser: JsonParser,\n): RawResponsePlanningAIResponse {\n const thought = extractXMLTag(xmlString, 'thought');\n const memory = extractXMLTag(xmlString, 'memory');\n const log = extractXMLTag(xmlString, 'log') || '';\n const error = extractXMLTag(xmlString, 'error');\n const actionType = extractXMLTag(xmlString, 'action-type');\n const actionParamStr = extractXMLTag(xmlString, 'action-param-json');\n\n // Parse <complete> tag with success attribute\n const completeGoalRegex =\n /<complete\\s+success=\"(true|false)\">([\\s\\S]*?)<\\/complete>/i;\n const completeGoalMatch = xmlString.match(completeGoalRegex);\n let finalizeMessage: string | undefined;\n let finalizeSuccess: boolean | undefined;\n\n if (completeGoalMatch) {\n finalizeSuccess = completeGoalMatch[1] === 'true';\n finalizeMessage = completeGoalMatch[2]?.trim() || undefined;\n }\n\n // Parse sub-goal related tags\n const updatePlanContent = extractXMLTag(xmlString, 'update-plan-content');\n const markSubGoalDone = extractXMLTag(xmlString, 'mark-sub-goal-done');\n\n const updateSubGoals = updatePlanContent\n ? parseSubGoalsFromXML(updatePlanContent)\n : undefined;\n const markFinishedIndexes = markSubGoalDone\n ? parseMarkFinishedIndexes(markSubGoalDone)\n : undefined;\n\n // Parse action\n let action: any = null;\n if (actionType && actionType.toLowerCase() !== 'null') {\n // Strip any trailing XML tags that LLM might have leaked into the action type\n // e.g. \"KeyboardPress</action-type>\\n<action-param-json>\" -> \"KeyboardPress\"\n const type = actionType.split('<')[0].trim();\n let param: any = undefined;\n\n if (actionParamStr) {\n try {\n // Parse the JSON string in action-param-json\n param = jsonParser(actionParamStr, {\n source: 'planning-action-param',\n preserveStringValueKeys:\n type.toLowerCase() === 'input' ? ['value'] : undefined,\n });\n } catch (e) {\n throw new Error(`Failed to parse action-param-json: ${e}`);\n }\n }\n\n action = {\n type,\n ...(param !== undefined ? { param } : {}),\n };\n }\n\n return {\n ...(thought ? { thought } : {}),\n ...(memory ? { memory } : {}),\n log,\n ...(error ? { error } : {}),\n action,\n ...(finalizeMessage !== undefined ? { finalizeMessage } : {}),\n ...(finalizeSuccess !== undefined ? { finalizeSuccess } : {}),\n ...(updateSubGoals?.length ? { updateSubGoals } : {}),\n ...(markFinishedIndexes?.length ? { markFinishedIndexes } : {}),\n };\n}\n\nexport async function plan(\n userInstruction: string,\n opts: PlanOptions,\n): Promise<PlanningAIResponse> {\n const { context, conversationHistory } = opts;\n const modelRuntime = opts.modelRuntime;\n const { adapter } = modelRuntime;\n const { shotSize } = context;\n const screenshotBase64 = context.screenshot.base64;\n\n if (opts.includeLocateInPlanning && !modelRuntime.config.modelFamily) {\n throw new Error(\n planningModelFamilyRequiredForLocateMessage(modelRuntime.config.slot),\n );\n }\n\n const locateResultAdapter =\n modelRuntime.config.modelFamily && adapter.locate.kind === 'standard'\n ? adapter.locate.resultAdapter\n : undefined;\n\n // Only enable sub-goals when aiAct is in deep-thinking planning mode.\n const includeSubGoals = opts.deepThink === true;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n locatePromptSpec: locateResultAdapter?.promptSpec,\n includeLocateInPlanning: opts.includeLocateInPlanning,\n includeThought: true, // always include thought\n includeSubGoals,\n });\n\n const preparedImage = await prepareModelImage({\n imageBase64: screenshotBase64,\n width: shotSize.width,\n height: shotSize.height,\n policy: adapter.imagePreprocess,\n });\n const imagePayload = preparedImage.imageBase64;\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n // Build sub-goal status text to include in the message\n // In planning deep-think mode: show full sub-goals with logs\n // Otherwise: show historical execution logs\n const executionProgressText = includeSubGoals\n ? conversationHistory.subGoalsToText()\n : conversationHistory.historicalLogsToText();\n const executionProgressSection = executionProgressText\n ? `\\n\\n${executionProgressText}`\n : conversationHistory.pendingFeedbackMessage\n ? ''\n : `\\n\\n${noPreviousActionsText}`;\n\n // Build memories text to include in the message\n const memoriesText = conversationHistory.memoriesToText();\n const memoriesSection = memoriesText ? `\\n\\n${memoriesText}` : '';\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The previous action has been executed, here is the latest screenshot. Please continue according to the instruction.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `This is the current screenshot.${memoriesSection}${executionProgressSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n\n // Compress history if it exceeds the threshold to avoid context overflow\n conversationHistory.compressHistory(50, 20);\n\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n let {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelRuntime, {\n abortSignal: opts.abortSignal,\n // Planning with locate results is localization-sensitive. Adapters decide\n // whether this should request original image detail.\n requiresOriginalImageDetail: opts.includeLocateInPlanning,\n });\n\n // Parse XML response to JSON object, retry once on parse failure\n let planFromAI: RawResponsePlanningAIResponse;\n try {\n try {\n planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);\n } catch {\n const retry = await callAI(msgs, modelRuntime, {\n abortSignal: opts.abortSignal,\n // Keep retry requests consistent with the initial planning call.\n requiresOriginalImageDetail: opts.includeLocateInPlanning,\n });\n rawResponse = retry.content;\n usage = retry.usage;\n reasoning_content = retry.reasoning_content;\n planFromAI = parseXMLPlanningResponse(rawResponse, adapter.jsonParser);\n }\n\n if (planFromAI.action && planFromAI.finalizeSuccess !== undefined) {\n warnLog(\n 'Planning response included both an action and <complete>; ignoring <complete> output.',\n );\n planFromAI.finalizeMessage = undefined;\n planFromAI.finalizeSuccess = undefined;\n }\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n\n // Check if task is completed via <complete> tag\n if (planFromAI.finalizeSuccess !== undefined) {\n debug('task completed via <complete> tag, stop planning');\n shouldContinuePlanning = false;\n // Mark all sub-goals as finished when goal is completed in planning deep-think mode.\n if (includeSubGoals) {\n conversationHistory.markAllSubGoalsFinished();\n }\n }\n\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult) {\n if (!opts.includeLocateInPlanning) {\n if (typeof locateResult === 'object') {\n // In prompt-only planning mode, ignore any accidental coordinates from the model.\n action.param[field] = { prompt: locateResult.prompt };\n }\n return;\n }\n\n assert(\n locateResultAdapter,\n 'generic planning locate normalization requires a standard locate adapter',\n );\n action.param[field] = {\n ...locateResult,\n locatedPixelBbox: locateResultAdapter.adaptPlanningParamToPixelBbox(\n locateResult,\n {\n preparedSize: preparedImage.preparedSize,\n contentSize: preparedImage.contentSize,\n },\n ),\n };\n }\n });\n });\n\n // Update sub-goals in conversation history only in planning deep-think mode.\n if (includeSubGoals) {\n if (planFromAI.updateSubGoals?.length) {\n conversationHistory.mergeSubGoals(planFromAI.updateSubGoals);\n }\n if (planFromAI.markFinishedIndexes?.length) {\n for (const index of planFromAI.markFinishedIndexes) {\n conversationHistory.markSubGoalFinished(index);\n }\n }\n // Append the planning log to the currently running sub-goal\n if (planFromAI.log) {\n conversationHistory.appendSubGoalLog(planFromAI.log);\n }\n } else {\n // Without planning deep-think mode, accumulate logs as historical execution steps.\n if (planFromAI.log) {\n conversationHistory.appendHistoricalLog(planFromAI.log);\n }\n }\n\n // Append memory to conversation history if present\n if (planFromAI.memory) {\n conversationHistory.appendMemory(planFromAI.memory);\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","warnLog","noPreviousActionsText","parseXMLPlanningResponse","xmlString","jsonParser","thought","extractXMLTag","memory","log","error","actionType","actionParamStr","completeGoalRegex","completeGoalMatch","finalizeMessage","finalizeSuccess","undefined","updatePlanContent","markSubGoalDone","updateSubGoals","parseSubGoalsFromXML","markFinishedIndexes","parseMarkFinishedIndexes","action","type","param","e","Error","plan","userInstruction","opts","context","conversationHistory","modelRuntime","adapter","shotSize","screenshotBase64","planningModelFamilyRequiredForLocateMessage","locateResultAdapter","includeSubGoals","systemPrompt","systemPromptToTaskPlanning","preparedImage","prepareModelImage","imagePayload","actionContext","instruction","latestFeedbackMessage","executionProgressText","executionProgressSection","memoriesText","memoriesSection","historyLog","msgs","rawResponse","usage","reasoning_content","callAI","planFromAI","retry","actions","shouldContinuePlanning","returnValue","buildYamlFlowFromPlans","assert","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","index","parseError","errorMessage","String","AIResponseParseError"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;ACcA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AACvB,MAAMC,UAAUD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS,YAAY;IAAE,SAAS;AAAK;AAErD,MAAME,wBACJ;AAKK,SAASC,yBACdC,SAAiB,EACjBC,UAAsB;IAEtB,MAAMC,UAAUC,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACzC,MAAMI,SAASD,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACxC,MAAMK,MAAMF,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW,UAAU;IAC/C,MAAMM,QAAQH,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACvC,MAAMO,aAAaJ,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAC5C,MAAMQ,iBAAiBL,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAGhD,MAAMS,oBACJ;IACF,MAAMC,oBAAoBV,UAAU,KAAK,CAACS;IAC1C,IAAIE;IACJ,IAAIC;IAEJ,IAAIF,mBAAmB;QACrBE,kBAAkBF,AAAyB,WAAzBA,iBAAiB,CAAC,EAAE;QACtCC,kBAAkBD,iBAAiB,CAAC,EAAE,EAAE,UAAUG;IACpD;IAGA,MAAMC,oBAAoBX,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IACnD,MAAMe,kBAAkBZ,AAAAA,IAAAA,wBAAAA,aAAAA,AAAAA,EAAcH,WAAW;IAEjD,MAAMgB,iBAAiBF,oBACnBG,AAAAA,IAAAA,wBAAAA,oBAAAA,AAAAA,EAAqBH,qBACrBD;IACJ,MAAMK,sBAAsBH,kBACxBI,AAAAA,IAAAA,wBAAAA,wBAAAA,AAAAA,EAAyBJ,mBACzBF;IAGJ,IAAIO,SAAc;IAClB,IAAIb,cAAcA,AAA6B,WAA7BA,WAAW,WAAW,IAAe;QAGrD,MAAMc,OAAOd,WAAW,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI;QAC1C,IAAIe;QAEJ,IAAId,gBACF,IAAI;YAEFc,QAAQrB,WAAWO,gBAAgB;gBACjC,QAAQ;gBACR,yBACEa,AAAuB,YAAvBA,KAAK,WAAW,KAAiB;oBAAC;iBAAQ,GAAGR;YACjD;QACF,EAAE,OAAOU,GAAG;YACV,MAAM,IAAIC,MAAM,CAAC,mCAAmC,EAAED,GAAG;QAC3D;QAGFH,SAAS;YACPC;YACA,GAAIC,AAAUT,WAAVS,QAAsB;gBAAEA;YAAM,IAAI,CAAC,CAAC;QAC1C;IACF;IAEA,OAAO;QACL,GAAIpB,UAAU;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAC9B,GAAIE,SAAS;YAAEA;QAAO,IAAI,CAAC,CAAC;QAC5BC;QACA,GAAIC,QAAQ;YAAEA;QAAM,IAAI,CAAC,CAAC;QAC1Bc;QACA,GAAIT,AAAoBE,WAApBF,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAIC,AAAoBC,WAApBD,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAII,gBAAgB,SAAS;YAAEA;QAAe,IAAI,CAAC,CAAC;QACpD,GAAIE,qBAAqB,SAAS;YAAEA;QAAoB,IAAI,CAAC,CAAC;IAChE;AACF;AAEO,eAAeO,KACpBC,eAAuB,EACvBC,IAAiB;IAEjB,MAAM,EAAEC,OAAO,EAAEC,mBAAmB,EAAE,GAAGF;IACzC,MAAMG,eAAeH,KAAK,YAAY;IACtC,MAAM,EAAEI,OAAO,EAAE,GAAGD;IACpB,MAAM,EAAEE,QAAQ,EAAE,GAAGJ;IACrB,MAAMK,mBAAmBL,QAAQ,UAAU,CAAC,MAAM;IAElD,IAAID,KAAK,uBAAuB,IAAI,CAACG,aAAa,MAAM,CAAC,WAAW,EAClE,MAAM,IAAIN,MACRU,AAAAA,IAAAA,mCAAAA,2CAAAA,AAAAA,EAA4CJ,aAAa,MAAM,CAAC,IAAI;IAIxE,MAAMK,sBACJL,aAAa,MAAM,CAAC,WAAW,IAAIC,AAAwB,eAAxBA,QAAQ,MAAM,CAAC,IAAI,GAClDA,QAAQ,MAAM,CAAC,aAAa,GAC5BlB;IAGN,MAAMuB,kBAAkBT,AAAmB,SAAnBA,KAAK,SAAS;IAEtC,MAAMU,eAAe,MAAMC,AAAAA,IAAAA,gCAAAA,0BAAAA,AAAAA,EAA2B;QACpD,aAAaX,KAAK,WAAW;QAC7B,kBAAkBQ,qBAAqB;QACvC,yBAAyBR,KAAK,uBAAuB;QACrD,gBAAgB;QAChBS;IACF;IAEA,MAAMG,gBAAgB,MAAMC,AAAAA,IAAAA,oCAAAA,iBAAAA,AAAAA,EAAkB;QAC5C,aAAaP;QACb,OAAOD,SAAS,KAAK;QACrB,QAAQA,SAAS,MAAM;QACvB,QAAQD,QAAQ,eAAe;IACjC;IACA,MAAMU,eAAeF,cAAc,WAAW;IAE9C,MAAMG,gBAAgBf,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMgB,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEhB,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAIkB;IAKJ,MAAMC,wBAAwBT,kBAC1BP,oBAAoB,cAAc,KAClCA,oBAAoB,oBAAoB;IAC5C,MAAMiB,2BAA2BD,wBAC7B,CAAC,IAAI,EAAEA,uBAAuB,GAC9BhB,oBAAoB,sBAAsB,GACxC,KACA,CAAC,IAAI,EAAE/B,uBAAuB;IAGpC,MAAMiD,eAAelB,oBAAoB,cAAc;IACvD,MAAMmB,kBAAkBD,eAAe,CAAC,IAAI,EAAEA,cAAc,GAAG;IAE/D,IAAIlB,oBAAoB,sBAAsB,EAAE;QAC9Ce,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGf,oBAAoB,sBAAsB,CAAC,qHAAqH,EAAEmB,kBAAkBF,0BAA0B;gBACzN;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAZ,oBAAoB,mCAAmC;IACzD,OACEe,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM,CAAC,+BAA+B,EAAEI,kBAAkBF,0BAA0B;YACtF;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKL;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFZ,oBAAoB,MAAM,CAACe;IAG3Bf,oBAAoB,eAAe,CAAC,IAAI;IAExC,MAAMoB,aAAapB,oBAAoB,QAAQ,CAACF,KAAK,kBAAkB;IAEvE,MAAMuB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASb;QAAa;WACrCM;WACAM;KACJ;IAED,IAAI,EACF,SAASE,WAAW,EACpBC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOJ,MAAMpB,cAAc;QACnC,aAAaH,KAAK,WAAW;QAG7B,6BAA6BA,KAAK,uBAAuB;IAC3D;IAGA,IAAI4B;IACJ,IAAI;QACF,IAAI;YACFA,aAAaxD,yBAAyBoD,aAAapB,QAAQ,UAAU;QACvE,EAAE,OAAM;YACN,MAAMyB,QAAQ,MAAMF,AAAAA,IAAAA,yBAAAA,MAAAA,AAAAA,EAAOJ,MAAMpB,cAAc;gBAC7C,aAAaH,KAAK,WAAW;gBAE7B,6BAA6BA,KAAK,uBAAuB;YAC3D;YACAwB,cAAcK,MAAM,OAAO;YAC3BJ,QAAQI,MAAM,KAAK;YACnBH,oBAAoBG,MAAM,iBAAiB;YAC3CD,aAAaxD,yBAAyBoD,aAAapB,QAAQ,UAAU;QACvE;QAEA,IAAIwB,WAAW,MAAM,IAAIA,AAA+B1C,WAA/B0C,WAAW,eAAe,EAAgB;YACjE1D,QACE;YAEF0D,WAAW,eAAe,GAAG1C;YAC7B0C,WAAW,eAAe,GAAG1C;QAC/B;QAEA,MAAM4C,UAAUF,WAAW,MAAM,GAAG;YAACA,WAAW,MAAM;SAAC,GAAG,EAAE;QAC5D,IAAIG,yBAAyB;QAG7B,IAAIH,AAA+B1C,WAA/B0C,WAAW,eAAe,EAAgB;YAC5C5D,MAAM;YACN+D,yBAAyB;YAEzB,IAAItB,iBACFP,oBAAoB,uBAAuB;QAE/C;QAEA,MAAM8B,cAAkC;YACtC,GAAGJ,UAAU;YACbE;YACAN;YACAC;YACAC;YACA,UAAUO,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EAAuBH,SAAS9B,KAAK,WAAW;YAC1D+B;QACF;QAEAG,IAAAA,sBAAAA,MAAAA,AAAAA,EAAON,YAAY;QAEnBE,QAAQ,OAAO,CAAC,CAACrC;YACf,MAAMC,OAAOD,OAAO,IAAI;YACxB,MAAM0C,sBAAsBnC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACP,SAAWA,OAAO,IAAI,KAAKC;YAG9B1B,MAAM,+BAA+BmE;YACrC,MAAMC,eAAeD,sBACjBE,AAAAA,IAAAA,mCAAAA,2BAAAA,AAAAA,EAA4BF,oBAAoB,WAAW,IAC3D,EAAE;YAENnE,MAAM,gBAAgBoE;YAEtBA,aAAa,OAAO,CAAC,CAACE;gBACpB,MAAMC,eAAe9C,OAAO,KAAK,CAAC6C,MAAM;gBACxC,IAAIC,cAAc;oBAChB,IAAI,CAACvC,KAAK,uBAAuB,EAAE;wBACjC,IAAI,AAAwB,YAAxB,OAAOuC,cAET9C,OAAO,KAAK,CAAC6C,MAAM,GAAG;4BAAE,QAAQC,aAAa,MAAM;wBAAC;wBAEtD;oBACF;oBAEAL,IAAAA,sBAAAA,MAAAA,AAAAA,EACE1B,qBACA;oBAEFf,OAAO,KAAK,CAAC6C,MAAM,GAAG;wBACpB,GAAGC,YAAY;wBACf,kBAAkB/B,oBAAoB,6BAA6B,CACjE+B,cACA;4BACE,cAAc3B,cAAc,YAAY;4BACxC,aAAaA,cAAc,WAAW;wBACxC;oBAEJ;gBACF;YACF;QACF;QAGA,IAAIH,iBAAiB;YACnB,IAAImB,WAAW,cAAc,EAAE,QAC7B1B,oBAAoB,aAAa,CAAC0B,WAAW,cAAc;YAE7D,IAAIA,WAAW,mBAAmB,EAAE,QAClC,KAAK,MAAMY,SAASZ,WAAW,mBAAmB,CAChD1B,oBAAoB,mBAAmB,CAACsC;YAI5C,IAAIZ,WAAW,GAAG,EAChB1B,oBAAoB,gBAAgB,CAAC0B,WAAW,GAAG;QAEvD,OAEE,IAAIA,WAAW,GAAG,EAChB1B,oBAAoB,mBAAmB,CAAC0B,WAAW,GAAG;QAK1D,IAAIA,WAAW,MAAM,EACnB1B,oBAAoB,YAAY,CAAC0B,WAAW,MAAM;QAGpD1B,oBAAoB,MAAM,CAAC;YACzB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAMsB;gBACR;aACD;QACH;QAEA,OAAOQ;IACT,EAAE,OAAOS,YAAY;QAEnB,MAAMC,eACJD,sBAAsB5C,QAAQ4C,WAAW,OAAO,GAAGE,OAAOF;QAC5D,MAAM,IAAIG,yBAAAA,oBAAoBA,CAC5B,CAAC,iBAAiB,EAAEF,cAAc,EAClClB,aACAC;IAEJ;AACF"}
@@ -26,13 +26,27 @@ __webpack_require__.r(__webpack_exports__);
26
26
  __webpack_require__.d(__webpack_exports__, {
27
27
  transformAutoGLMAction: ()=>transformAutoGLMAction
28
28
  });
29
- const external_common_js_namespaceObject = require("../../common.js");
30
29
  const logger_namespaceObject = require("@midscene/shared/logger");
30
+ const bbox_js_namespaceObject = require("../../shared/model-locate-result/bbox.js");
31
+ const pixel_bbox_mapper_js_namespaceObject = require("../../shared/model-locate-result/pixel-bbox-mapper.js");
31
32
  const debug = (0, logger_namespaceObject.getDebug)('auto-glm-actions');
32
33
  const AUTO_GLM_COORDINATE_MAX = 1000;
33
- function autoGLMCoordinateToBbox(x, y, width, height) {
34
- const bbox = (0, external_common_js_namespaceObject.pointToBbox)(x, y, 10);
35
- return (0, external_common_js_namespaceObject.adaptBbox)(bbox, width, height, 'auto-glm');
34
+ function autoGLMCoordinateToLocateParam(coordinate, size) {
35
+ const ctx = {
36
+ preparedSize: size
37
+ };
38
+ const pixelBbox = (0, pixel_bbox_mapper_js_namespaceObject.mapLocateResultToPixelBboxByCoordinates)({
39
+ type: 'point',
40
+ coordinates: coordinate
41
+ }, ctx, {
42
+ shape: 'point',
43
+ order: 'xy',
44
+ normalizedBy: AUTO_GLM_COORDINATE_MAX
45
+ });
46
+ return {
47
+ prompt: '',
48
+ locatedPixelBbox: (0, bbox_js_namespaceObject.finalizePixelBbox)(pixelBbox, coordinate, ctx)
49
+ };
36
50
  }
37
51
  const BACK_BUTTON_NAMES = [
38
52
  'AndroidBackButton',
@@ -70,16 +84,7 @@ function transformAutoGLMAction(action, size, actionSpace) {
70
84
  {
71
85
  const tapAction = doAction;
72
86
  debug('Transform Tap action:', tapAction);
73
- const [x1, y1, x2, y2] = autoGLMCoordinateToBbox(tapAction.element[0], tapAction.element[1], size.width, size.height);
74
- const locate = {
75
- prompt: '',
76
- bbox: [
77
- x1,
78
- y1,
79
- x2,
80
- y2
81
- ]
82
- };
87
+ const locate = autoGLMCoordinateToLocateParam(tapAction.element, size);
83
88
  return [
84
89
  {
85
90
  type: 'Tap',
@@ -93,16 +98,7 @@ function transformAutoGLMAction(action, size, actionSpace) {
93
98
  {
94
99
  const doubleTapAction = doAction;
95
100
  debug('Transform Double Tap action:', doubleTapAction);
96
- const [x1, y1, x2, y2] = autoGLMCoordinateToBbox(doubleTapAction.element[0], doubleTapAction.element[1], size.width, size.height);
97
- const locate = {
98
- prompt: '',
99
- bbox: [
100
- x1,
101
- y1,
102
- x2,
103
- y2
104
- ]
105
- };
101
+ const locate = autoGLMCoordinateToLocateParam(doubleTapAction.element, size);
106
102
  return [
107
103
  {
108
104
  type: 'DoubleClick',
@@ -129,16 +125,7 @@ function transformAutoGLMAction(action, size, actionSpace) {
129
125
  {
130
126
  const swipeAction = doAction;
131
127
  debug('Transform Swipe action:', swipeAction);
132
- const [x1, y1, x2, y2] = autoGLMCoordinateToBbox(swipeAction.start[0], swipeAction.start[1], size.width, size.height);
133
- const locate = {
134
- prompt: '',
135
- bbox: [
136
- x1,
137
- y1,
138
- x2,
139
- y2
140
- ]
141
- };
128
+ const locate = autoGLMCoordinateToLocateParam(swipeAction.start, size);
142
129
  const deltaX = swipeAction.end[0] - swipeAction.start[0];
143
130
  const deltaY = swipeAction.end[1] - swipeAction.start[1];
144
131
  let direction;
@@ -169,16 +156,7 @@ function transformAutoGLMAction(action, size, actionSpace) {
169
156
  {
170
157
  const longPressAction = doAction;
171
158
  debug('Transform Long Press action:', longPressAction);
172
- const [x1, y1, x2, y2] = autoGLMCoordinateToBbox(longPressAction.element[0], longPressAction.element[1], size.width, size.height);
173
- const locate = {
174
- prompt: '',
175
- bbox: [
176
- x1,
177
- y1,
178
- x2,
179
- y2
180
- ]
181
- };
159
+ const locate = autoGLMCoordinateToLocateParam(longPressAction.element, size);
182
160
  return [
183
161
  {
184
162
  type: 'LongPress',
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai-model/models/auto-glm/actions.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../../src/ai-model/models/auto-glm/actions.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { DeviceAction } from '@/device';\nimport type {\n PlanningAction,\n PlanningLocateParamWithLocatedPixelBbox,\n} from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport { finalizePixelBbox } from '../../shared/model-locate-result/bbox';\nimport { mapLocateResultToPixelBboxByCoordinates } from '../../shared/model-locate-result/pixel-bbox-mapper';\n\nconst debug = getDebug('auto-glm-actions');\n\n/**\n * Auto-GLM coordinate system range: [0, AUTO_GLM_COORDINATE_MAX]\n */\nconst AUTO_GLM_COORDINATE_MAX = 1000;\n\nfunction autoGLMCoordinateToLocateParam(\n coordinate: [number, number],\n size: { width: number; height: number },\n): PlanningLocateParamWithLocatedPixelBbox {\n const ctx = { preparedSize: size };\n const pixelBbox = mapLocateResultToPixelBboxByCoordinates(\n { type: 'point', coordinates: coordinate },\n ctx,\n { shape: 'point', order: 'xy', normalizedBy: AUTO_GLM_COORDINATE_MAX },\n );\n\n return {\n prompt: '',\n locatedPixelBbox: finalizePixelBbox(pixelBbox, coordinate, ctx),\n };\n}\n\nexport interface BaseAction {\n _metadata: string;\n think?: string;\n}\n\nexport interface TapAction extends BaseAction {\n _metadata: 'do';\n action: 'Tap';\n element: [number, number];\n}\n\nexport interface DoubleTapAction extends BaseAction {\n _metadata: 'do';\n action: 'Double Tap';\n element: [number, number];\n}\n\nexport interface TypeAction extends BaseAction {\n _metadata: 'do';\n action: 'Type';\n text: string;\n}\n\nexport interface SwipeAction extends BaseAction {\n _metadata: 'do';\n action: 'Swipe';\n start: [number, number];\n end: [number, number];\n}\n\nexport interface LongPressAction extends BaseAction {\n _metadata: 'do';\n action: 'Long Press';\n element: [number, number];\n}\n\nexport interface LaunchAction extends BaseAction {\n _metadata: 'do';\n action: 'Launch';\n app: string;\n}\n\nexport interface BackAction extends BaseAction {\n _metadata: 'do';\n action: 'Back';\n}\n\nexport interface HomeAction extends BaseAction {\n _metadata: 'do';\n action: 'Home';\n}\n\nexport interface WaitAction extends BaseAction {\n _metadata: 'do';\n action: 'Wait';\n durationMs: number;\n}\n\nexport interface InteractAction extends BaseAction {\n _metadata: 'do';\n action: 'Interact';\n}\n\nexport interface CallAPIAction extends BaseAction {\n _metadata: 'do';\n action: 'Call_API';\n instruction: string;\n}\n\nexport interface TakeoverAction extends BaseAction {\n _metadata: 'do';\n action: 'Take_over';\n message: string;\n}\n\nexport interface NoteAction extends BaseAction {\n _metadata: 'do';\n action: 'Note';\n message: string;\n}\n\nexport interface FinishAction extends BaseAction {\n _metadata: 'finish';\n message: string;\n}\n\nexport type ParsedAction =\n | TapAction\n | DoubleTapAction\n | TypeAction\n | SwipeAction\n | LongPressAction\n | LaunchAction\n | BackAction\n | HomeAction\n | WaitAction\n | InteractAction\n | CallAPIAction\n | TakeoverAction\n | NoteAction\n | FinishAction;\n\nconst BACK_BUTTON_NAMES = ['AndroidBackButton', 'HarmonyBackButton'];\nconst HOME_BUTTON_NAMES = ['AndroidHomeButton', 'HarmonyHomeButton'];\n\n/**\n * Find the action name in actionSpace that matches one of the known names.\n * Falls back to defaultName if no match found or actionSpace is not provided.\n */\nfunction findActionName(\n actionSpace: DeviceAction[] | undefined,\n knownNames: string[],\n defaultName: string,\n): string {\n if (!actionSpace) return defaultName;\n const match = actionSpace.find((a) => knownNames.includes(a.name));\n return match ? match.name : defaultName;\n}\n\nexport function transformAutoGLMAction(\n action: ParsedAction,\n size: { width: number; height: number },\n actionSpace?: DeviceAction[],\n): PlanningAction[] {\n try {\n switch (action._metadata) {\n case 'finish': {\n const finishAction = action as FinishAction;\n debug('Transform finish action:', finishAction);\n return [\n {\n type: 'Finished',\n param: {},\n thought: finishAction.message,\n },\n ];\n }\n case 'do': {\n const doAction = action as\n | TapAction\n | DoubleTapAction\n | TypeAction\n | SwipeAction\n | LongPressAction\n | LaunchAction\n | BackAction\n | HomeAction\n | WaitAction\n | InteractAction\n | CallAPIAction\n | TakeoverAction\n | NoteAction;\n\n switch ((doAction as any).action) {\n case 'Tap': {\n const tapAction = doAction as TapAction;\n debug('Transform Tap action:', tapAction);\n const locate = autoGLMCoordinateToLocateParam(\n tapAction.element,\n size,\n );\n\n return [\n {\n type: 'Tap',\n param: {\n locate,\n },\n },\n ];\n }\n case 'Double Tap': {\n const doubleTapAction = doAction as DoubleTapAction;\n debug('Transform Double Tap action:', doubleTapAction);\n const locate = autoGLMCoordinateToLocateParam(\n doubleTapAction.element,\n size,\n );\n\n return [\n {\n type: 'DoubleClick',\n param: {\n locate,\n },\n },\n ];\n }\n case 'Type': {\n const typeAction = doAction as TypeAction;\n debug('Transform Type action:', typeAction);\n\n return [\n {\n type: 'Input',\n param: {\n value: typeAction.text,\n },\n },\n ];\n }\n case 'Swipe': {\n const swipeAction = doAction as SwipeAction;\n debug('Transform Swipe action:', swipeAction);\n\n const locate = autoGLMCoordinateToLocateParam(\n swipeAction.start,\n size,\n );\n\n // Calculate horizontal and vertical delta in [0,AUTO_GLM_COORDINATE_MAX] coordinate system\n const deltaX = swipeAction.end[0] - swipeAction.start[0];\n const deltaY = swipeAction.end[1] - swipeAction.start[1];\n\n // Determine direction and distance\n let direction: 'up' | 'down' | 'left' | 'right';\n let distance: number;\n\n const absDeltaX = Math.abs(deltaX);\n const absDeltaY = Math.abs(deltaY);\n\n if (absDeltaY > absDeltaX) {\n // Vertical scroll\n distance = Math.round(\n (absDeltaY * size.height) / AUTO_GLM_COORDINATE_MAX,\n );\n direction = deltaY > 0 ? 'up' : 'down';\n } else {\n // Horizontal scroll\n distance = Math.round(\n (absDeltaX * size.width) / AUTO_GLM_COORDINATE_MAX,\n );\n direction = deltaX > 0 ? 'left' : 'right';\n }\n\n debug(\n `Calculate swipe direction: ${direction}, distance: ${distance}`,\n );\n\n return [\n {\n type: 'Scroll',\n param: {\n locate,\n // The scrolling direction here all refers to which direction of the page's content will appear on the screen.\n distance,\n direction,\n },\n thought: swipeAction.think || '',\n },\n ];\n }\n case 'Long Press': {\n const longPressAction = doAction as LongPressAction;\n debug('Transform Long Press action:', longPressAction);\n const locate = autoGLMCoordinateToLocateParam(\n longPressAction.element,\n size,\n );\n\n return [\n {\n type: 'LongPress',\n param: {\n locate,\n },\n thought: longPressAction.think || '',\n },\n ];\n }\n case 'Back': {\n const backAction = doAction as BackAction;\n debug('Transform Back action:', backAction);\n return [\n {\n type: findActionName(\n actionSpace,\n BACK_BUTTON_NAMES,\n 'AndroidBackButton',\n ),\n param: {},\n thought: backAction.think || '',\n },\n ];\n }\n case 'Home': {\n const homeAction = doAction as HomeAction;\n debug('Transform Home action:', homeAction);\n return [\n {\n type: findActionName(\n actionSpace,\n HOME_BUTTON_NAMES,\n 'AndroidHomeButton',\n ),\n param: {},\n thought: homeAction.think || '',\n },\n ];\n }\n case 'Wait': {\n const waitAction = doAction as WaitAction;\n debug('Transform Wait action:', waitAction);\n return [\n {\n type: 'Sleep',\n param: {\n timeMs: waitAction.durationMs,\n },\n thought: waitAction.think || '',\n },\n ];\n }\n case 'Launch': {\n const launchAction = doAction as LaunchAction;\n debug('Transform Launch action:', launchAction);\n return [\n {\n type: 'Launch',\n param: { uri: launchAction.app },\n thought: launchAction.think || '',\n },\n ];\n }\n case 'Interact': {\n throw new Error(\n `Action \"Interact\" from auto-glm is not supported in the current implementation.`,\n );\n }\n case 'Call_API': {\n throw new Error(\n `Action \"Call_API\" from auto-glm is not supported in the current implementation.`,\n );\n }\n case 'Take_over': {\n throw new Error(\n `Action \"Take_over\" from auto-glm is not supported in the current implementation.`,\n );\n }\n case 'Note': {\n throw new Error(\n `Action \"Note\" from auto-glm is not supported in the current implementation.`,\n );\n }\n default:\n throw new Error(\n `Unknown do() action type: ${(doAction as any).action}`,\n );\n }\n }\n default:\n throw new Error(\n `Unknown action metadata: ${(action as any)._metadata}`,\n );\n }\n } catch (error) {\n const errorMessage = error instanceof Error ? error.message : String(error);\n debug('Transform error:', errorMessage);\n throw new Error(`Failed to transform action: ${errorMessage}`);\n }\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","AUTO_GLM_COORDINATE_MAX","autoGLMCoordinateToLocateParam","coordinate","size","ctx","pixelBbox","mapLocateResultToPixelBboxByCoordinates","finalizePixelBbox","BACK_BUTTON_NAMES","HOME_BUTTON_NAMES","findActionName","actionSpace","knownNames","defaultName","match","a","transformAutoGLMAction","action","finishAction","doAction","tapAction","locate","doubleTapAction","typeAction","swipeAction","deltaX","deltaY","direction","distance","absDeltaX","Math","absDeltaY","longPressAction","backAction","homeAction","waitAction","launchAction","Error","error","errorMessage","String"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;ACGA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAKvB,MAAMC,0BAA0B;AAEhC,SAASC,+BACPC,UAA4B,EAC5BC,IAAuC;IAEvC,MAAMC,MAAM;QAAE,cAAcD;IAAK;IACjC,MAAME,YAAYC,AAAAA,IAAAA,qCAAAA,uCAAAA,AAAAA,EAChB;QAAE,MAAM;QAAS,aAAaJ;IAAW,GACzCE,KACA;QAAE,OAAO;QAAS,OAAO;QAAM,cAAcJ;IAAwB;IAGvE,OAAO;QACL,QAAQ;QACR,kBAAkBO,AAAAA,IAAAA,wBAAAA,iBAAAA,AAAAA,EAAkBF,WAAWH,YAAYE;IAC7D;AACF;AAwGA,MAAMI,oBAAoB;IAAC;IAAqB;CAAoB;AACpE,MAAMC,oBAAoB;IAAC;IAAqB;CAAoB;AAMpE,SAASC,eACPC,WAAuC,EACvCC,UAAoB,EACpBC,WAAmB;IAEnB,IAAI,CAACF,aAAa,OAAOE;IACzB,MAAMC,QAAQH,YAAY,IAAI,CAAC,CAACI,IAAMH,WAAW,QAAQ,CAACG,EAAE,IAAI;IAChE,OAAOD,QAAQA,MAAM,IAAI,GAAGD;AAC9B;AAEO,SAASG,uBACdC,MAAoB,EACpBd,IAAuC,EACvCQ,WAA4B;IAE5B,IAAI;QACF,OAAQM,OAAO,SAAS;YACtB,KAAK;gBAAU;oBACb,MAAMC,eAAeD;oBACrBnB,MAAM,4BAA4BoB;oBAClC,OAAO;wBACL;4BACE,MAAM;4BACN,OAAO,CAAC;4BACR,SAASA,aAAa,OAAO;wBAC/B;qBACD;gBACH;YACA,KAAK;gBAAM;oBACT,MAAMC,WAAWF;oBAejB,OAASE,SAAiB,MAAM;wBAC9B,KAAK;4BAAO;gCACV,MAAMC,YAAYD;gCAClBrB,MAAM,yBAAyBsB;gCAC/B,MAAMC,SAASpB,+BACbmB,UAAU,OAAO,EACjBjB;gCAGF,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACLkB;wCACF;oCACF;iCACD;4BACH;wBACA,KAAK;4BAAc;gCACjB,MAAMC,kBAAkBH;gCACxBrB,MAAM,gCAAgCwB;gCACtC,MAAMD,SAASpB,+BACbqB,gBAAgB,OAAO,EACvBnB;gCAGF,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACLkB;wCACF;oCACF;iCACD;4BACH;wBACA,KAAK;4BAAQ;gCACX,MAAME,aAAaJ;gCACnBrB,MAAM,0BAA0ByB;gCAEhC,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACL,OAAOA,WAAW,IAAI;wCACxB;oCACF;iCACD;4BACH;wBACA,KAAK;4BAAS;gCACZ,MAAMC,cAAcL;gCACpBrB,MAAM,2BAA2B0B;gCAEjC,MAAMH,SAASpB,+BACbuB,YAAY,KAAK,EACjBrB;gCAIF,MAAMsB,SAASD,YAAY,GAAG,CAAC,EAAE,GAAGA,YAAY,KAAK,CAAC,EAAE;gCACxD,MAAME,SAASF,YAAY,GAAG,CAAC,EAAE,GAAGA,YAAY,KAAK,CAAC,EAAE;gCAGxD,IAAIG;gCACJ,IAAIC;gCAEJ,MAAMC,YAAYC,KAAK,GAAG,CAACL;gCAC3B,MAAMM,YAAYD,KAAK,GAAG,CAACJ;gCAE3B,IAAIK,YAAYF,WAAW;oCAEzBD,WAAWE,KAAK,KAAK,CAClBC,YAAY5B,KAAK,MAAM,GAAIH;oCAE9B2B,YAAYD,SAAS,IAAI,OAAO;gCAClC,OAAO;oCAELE,WAAWE,KAAK,KAAK,CAClBD,YAAY1B,KAAK,KAAK,GAAIH;oCAE7B2B,YAAYF,SAAS,IAAI,SAAS;gCACpC;gCAEA3B,MACE,CAAC,2BAA2B,EAAE6B,UAAU,YAAY,EAAEC,UAAU;gCAGlE,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACLP;4CAEAO;4CACAD;wCACF;wCACA,SAASH,YAAY,KAAK,IAAI;oCAChC;iCACD;4BACH;wBACA,KAAK;4BAAc;gCACjB,MAAMQ,kBAAkBb;gCACxBrB,MAAM,gCAAgCkC;gCACtC,MAAMX,SAASpB,+BACb+B,gBAAgB,OAAO,EACvB7B;gCAGF,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACLkB;wCACF;wCACA,SAASW,gBAAgB,KAAK,IAAI;oCACpC;iCACD;4BACH;wBACA,KAAK;4BAAQ;gCACX,MAAMC,aAAad;gCACnBrB,MAAM,0BAA0BmC;gCAChC,OAAO;oCACL;wCACE,MAAMvB,eACJC,aACAH,mBACA;wCAEF,OAAO,CAAC;wCACR,SAASyB,WAAW,KAAK,IAAI;oCAC/B;iCACD;4BACH;wBACA,KAAK;4BAAQ;gCACX,MAAMC,aAAaf;gCACnBrB,MAAM,0BAA0BoC;gCAChC,OAAO;oCACL;wCACE,MAAMxB,eACJC,aACAF,mBACA;wCAEF,OAAO,CAAC;wCACR,SAASyB,WAAW,KAAK,IAAI;oCAC/B;iCACD;4BACH;wBACA,KAAK;4BAAQ;gCACX,MAAMC,aAAahB;gCACnBrB,MAAM,0BAA0BqC;gCAChC,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACL,QAAQA,WAAW,UAAU;wCAC/B;wCACA,SAASA,WAAW,KAAK,IAAI;oCAC/B;iCACD;4BACH;wBACA,KAAK;4BAAU;gCACb,MAAMC,eAAejB;gCACrBrB,MAAM,4BAA4BsC;gCAClC,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CAAE,KAAKA,aAAa,GAAG;wCAAC;wCAC/B,SAASA,aAAa,KAAK,IAAI;oCACjC;iCACD;4BACH;wBACA,KAAK;4BACH,MAAM,IAAIC,MACR;wBAGJ,KAAK;4BACH,MAAM,IAAIA,MACR;wBAGJ,KAAK;4BACH,MAAM,IAAIA,MACR;wBAGJ,KAAK;4BACH,MAAM,IAAIA,MACR;wBAGJ;4BACE,MAAM,IAAIA,MACR,CAAC,0BAA0B,EAAGlB,SAAiB,MAAM,EAAE;oBAE7D;gBACF;YACA;gBACE,MAAM,IAAIkB,MACR,CAAC,yBAAyB,EAAGpB,OAAe,SAAS,EAAE;QAE7D;IACF,EAAE,OAAOqB,OAAO;QACd,MAAMC,eAAeD,iBAAiBD,QAAQC,MAAM,OAAO,GAAGE,OAAOF;QACrExC,MAAM,oBAAoByC;QAC1B,MAAM,IAAIF,MAAM,CAAC,4BAA4B,EAAEE,cAAc;IAC/D;AACF"}
@@ -0,0 +1,79 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.d = (exports1, definition)=>{
5
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
+ enumerable: true,
7
+ get: definition[key]
8
+ });
9
+ };
10
+ })();
11
+ (()=>{
12
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
+ })();
14
+ (()=>{
15
+ __webpack_require__.r = (exports1)=>{
16
+ if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
+ value: 'Module'
18
+ });
19
+ Object.defineProperty(exports1, '__esModule', {
20
+ value: true
21
+ });
22
+ };
23
+ })();
24
+ var __webpack_exports__ = {};
25
+ __webpack_require__.r(__webpack_exports__);
26
+ __webpack_require__.d(__webpack_exports__, {
27
+ autoGlmAdapters: ()=>autoGlmAdapters
28
+ });
29
+ const external_locate_js_namespaceObject = require("./locate.js");
30
+ const external_planning_js_namespaceObject = require("./planning.js");
31
+ const external_prompt_js_namespaceObject = require("./prompt.js");
32
+ const defaultAutoGlmReplanningCycleLimit = 100;
33
+ function createAutoGlmAdapter({ getPlanPrompt, getLocatePrompt }) {
34
+ return {
35
+ chatCompletion: {
36
+ unsupportedUserConfig: [
37
+ 'reasoningEnabled',
38
+ 'reasoningEffort',
39
+ 'reasoningBudget'
40
+ ],
41
+ buildChatCompletionParams: ({ midsceneDefaults, userConfig })=>({
42
+ config: {
43
+ temperature: userConfig.temperature ?? midsceneDefaults.temperature,
44
+ top_p: 0.85,
45
+ frequency_penalty: 0.2
46
+ }
47
+ })
48
+ },
49
+ planning: {
50
+ kind: 'custom',
51
+ cacheEnabled: false,
52
+ defaultReplanningCycleLimit: defaultAutoGlmReplanningCycleLimit,
53
+ planFn: (userInstruction, options)=>(0, external_planning_js_namespaceObject.autoGlmPlanning)(userInstruction, options, getPlanPrompt)
54
+ },
55
+ locate: {
56
+ kind: 'custom',
57
+ locateFn: (elementDescription, options)=>(0, external_locate_js_namespaceObject.autoGlmLocate)(elementDescription, options, getLocatePrompt)
58
+ }
59
+ };
60
+ }
61
+ const autoGlmAdapters = {
62
+ 'auto-glm': createAutoGlmAdapter({
63
+ getPlanPrompt: external_prompt_js_namespaceObject.getAutoGLMChinesePlanPrompt,
64
+ getLocatePrompt: external_prompt_js_namespaceObject.getAutoGLMChineseLocatePrompt
65
+ }),
66
+ 'auto-glm-multilingual': createAutoGlmAdapter({
67
+ getPlanPrompt: external_prompt_js_namespaceObject.getAutoGLMMultilingualPlanPrompt,
68
+ getLocatePrompt: external_prompt_js_namespaceObject.getAutoGLMMultilingualLocatePrompt
69
+ })
70
+ };
71
+ exports.autoGlmAdapters = __webpack_exports__.autoGlmAdapters;
72
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
73
+ "autoGlmAdapters"
74
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
75
+ Object.defineProperty(exports, '__esModule', {
76
+ value: true
77
+ });
78
+
79
+ //# sourceMappingURL=adapter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai-model/models/auto-glm/adapter.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../../src/ai-model/models/auto-glm/adapter.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { TModelFamily } from '@midscene/shared/env';\nimport type { ModelAdapterDefinition } from '../types';\nimport { autoGlmLocate } from './locate';\nimport { autoGlmPlanning } from './planning';\nimport {\n getAutoGLMChineseLocatePrompt,\n getAutoGLMChinesePlanPrompt,\n getAutoGLMMultilingualLocatePrompt,\n getAutoGLMMultilingualPlanPrompt,\n} from './prompt';\n\nconst defaultAutoGlmReplanningCycleLimit = 100;\n\nfunction createAutoGlmAdapter({\n getPlanPrompt,\n getLocatePrompt,\n}: {\n getPlanPrompt: () => string;\n getLocatePrompt: () => string;\n}): ModelAdapterDefinition {\n return {\n chatCompletion: {\n unsupportedUserConfig: [\n 'reasoningEnabled',\n 'reasoningEffort',\n 'reasoningBudget',\n ],\n buildChatCompletionParams: ({ midsceneDefaults, userConfig }) => ({\n config: {\n temperature: userConfig.temperature ?? midsceneDefaults.temperature,\n top_p: 0.85,\n frequency_penalty: 0.2,\n },\n }),\n },\n planning: {\n kind: 'custom',\n cacheEnabled: false,\n defaultReplanningCycleLimit: defaultAutoGlmReplanningCycleLimit,\n planFn: (userInstruction, options) =>\n autoGlmPlanning(userInstruction, options, getPlanPrompt),\n },\n locate: {\n kind: 'custom',\n locateFn: (elementDescription, options) =>\n autoGlmLocate(elementDescription, options, getLocatePrompt),\n },\n };\n}\n\nexport const autoGlmAdapters = {\n 'auto-glm': createAutoGlmAdapter({\n getPlanPrompt: getAutoGLMChinesePlanPrompt,\n getLocatePrompt: getAutoGLMChineseLocatePrompt,\n }),\n 'auto-glm-multilingual': createAutoGlmAdapter({\n getPlanPrompt: getAutoGLMMultilingualPlanPrompt,\n getLocatePrompt: getAutoGLMMultilingualLocatePrompt,\n }),\n} satisfies Pick<\n Record<TModelFamily, ModelAdapterDefinition>,\n 'auto-glm' | 'auto-glm-multilingual'\n>;\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","defaultAutoGlmReplanningCycleLimit","createAutoGlmAdapter","getPlanPrompt","getLocatePrompt","midsceneDefaults","userConfig","userInstruction","options","autoGlmPlanning","elementDescription","autoGlmLocate","autoGlmAdapters","getAutoGLMChinesePlanPrompt","getAutoGLMChineseLocatePrompt","getAutoGLMMultilingualPlanPrompt","getAutoGLMMultilingualLocatePrompt"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;ACKA,MAAMI,qCAAqC;AAE3C,SAASC,qBAAqB,EAC5BC,aAAa,EACbC,eAAe,EAIhB;IACC,OAAO;QACL,gBAAgB;YACd,uBAAuB;gBACrB;gBACA;gBACA;aACD;YACD,2BAA2B,CAAC,EAAEC,gBAAgB,EAAEC,UAAU,EAAE,GAAM;oBAChE,QAAQ;wBACN,aAAaA,WAAW,WAAW,IAAID,iBAAiB,WAAW;wBACnE,OAAO;wBACP,mBAAmB;oBACrB;gBACF;QACF;QACA,UAAU;YACR,MAAM;YACN,cAAc;YACd,6BAA6BJ;YAC7B,QAAQ,CAACM,iBAAiBC,UACxBC,AAAAA,IAAAA,qCAAAA,eAAAA,AAAAA,EAAgBF,iBAAiBC,SAASL;QAC9C;QACA,QAAQ;YACN,MAAM;YACN,UAAU,CAACO,oBAAoBF,UAC7BG,AAAAA,IAAAA,mCAAAA,aAAAA,AAAAA,EAAcD,oBAAoBF,SAASJ;QAC/C;IACF;AACF;AAEO,MAAMQ,kBAAkB;IAC7B,YAAYV,qBAAqB;QAC/B,eAAeW,mCAAAA,2BAA2BA;QAC1C,iBAAiBC,mCAAAA,6BAA6BA;IAChD;IACA,yBAAyBZ,qBAAqB;QAC5C,eAAea,mCAAAA,gCAAgCA;QAC/C,iBAAiBC,mCAAAA,kCAAkCA;IACrD;AACF"}
@@ -0,0 +1,146 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.d = (exports1, definition)=>{
5
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
+ enumerable: true,
7
+ get: definition[key]
8
+ });
9
+ };
10
+ })();
11
+ (()=>{
12
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
+ })();
14
+ (()=>{
15
+ __webpack_require__.r = (exports1)=>{
16
+ if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
+ value: 'Module'
18
+ });
19
+ Object.defineProperty(exports1, '__esModule', {
20
+ value: true
21
+ });
22
+ };
23
+ })();
24
+ var __webpack_exports__ = {};
25
+ __webpack_require__.r(__webpack_exports__);
26
+ __webpack_require__.d(__webpack_exports__, {
27
+ autoGlmLocate: ()=>autoGlmLocate
28
+ });
29
+ const extractor_namespaceObject = require("@midscene/shared/extractor");
30
+ const logger_namespaceObject = require("@midscene/shared/logger");
31
+ const utils_namespaceObject = require("@midscene/shared/utils");
32
+ const external_inspect_js_namespaceObject = require("../../inspect.js");
33
+ const llm_locator_js_namespaceObject = require("../../prompt/llm-locator.js");
34
+ const index_js_namespaceObject = require("../../service-caller/index.js");
35
+ const bbox_js_namespaceObject = require("../../shared/model-locate-result/bbox.js");
36
+ const pixel_bbox_mapper_js_namespaceObject = require("../../shared/model-locate-result/pixel-bbox-mapper.js");
37
+ const locate_result_rect_js_namespaceObject = require("../../workflows/inspect/locate-result-rect.js");
38
+ const search_area_mapping_js_namespaceObject = require("../../workflows/inspect/search-area-mapping.js");
39
+ const external_parser_js_namespaceObject = require("./parser.js");
40
+ const debugInspect = (0, logger_namespaceObject.getDebug)('ai:inspect');
41
+ async function autoGlmLocate(elementDescription, options, getSystemPrompt) {
42
+ const { context, modelRuntime } = options;
43
+ const screenshotBase64 = context.screenshot.base64;
44
+ (0, utils_namespaceObject.assert)(elementDescription, "cannot find the target element description");
45
+ const elementDescriptionText = (0, external_inspect_js_namespaceObject.extraTextFromUserPrompt)(elementDescription);
46
+ const userInstructionPrompt = (0, llm_locator_js_namespaceObject.findElementPrompt)(elementDescriptionText);
47
+ const locateImage = options.searchConfig?.image ?? {
48
+ imageBase64: screenshotBase64,
49
+ width: context.shotSize.width,
50
+ height: context.shotSize.height
51
+ };
52
+ const imagePayload = locateImage.imageBase64;
53
+ const imageWidth = locateImage.width;
54
+ const imageHeight = locateImage.height;
55
+ const msgs = [
56
+ {
57
+ role: 'system',
58
+ content: getSystemPrompt()
59
+ },
60
+ {
61
+ role: 'user',
62
+ content: [
63
+ {
64
+ type: 'image_url',
65
+ image_url: {
66
+ url: imagePayload,
67
+ detail: 'high'
68
+ }
69
+ },
70
+ {
71
+ type: 'text',
72
+ text: `Tap: ${userInstructionPrompt}`
73
+ }
74
+ ]
75
+ }
76
+ ];
77
+ if ('string' != typeof elementDescription) {
78
+ const addOns = await (0, external_inspect_js_namespaceObject.promptsToChatParam)({
79
+ images: elementDescription.images,
80
+ convertHttpImage2Base64: elementDescription.convertHttpImage2Base64
81
+ });
82
+ msgs.push(...addOns);
83
+ }
84
+ const { content: rawResponseContent, usage } = await (0, index_js_namespaceObject.callAIWithStringResponse)(msgs, modelRuntime, {
85
+ abortSignal: options.abortSignal
86
+ });
87
+ debugInspect('auto-glm rawResponse:', rawResponseContent);
88
+ const parsed = (0, external_parser_js_namespaceObject.parseAutoGLMLocateResponse)(rawResponseContent);
89
+ debugInspect('auto-glm thinking:', parsed.think);
90
+ debugInspect('auto-glm coordinates:', parsed.coordinates);
91
+ let resRect;
92
+ let matchedElement;
93
+ let errors = [];
94
+ if (parsed.error || !parsed.coordinates) {
95
+ errors = [
96
+ parsed.error || 'Failed to parse auto-glm response'
97
+ ];
98
+ debugInspect('auto-glm parse error:', errors[0]);
99
+ } else {
100
+ const { x, y } = parsed.coordinates;
101
+ debugInspect('auto-glm coordinates [0-999]:', {
102
+ x,
103
+ y
104
+ });
105
+ const ctx = {
106
+ preparedSize: {
107
+ width: imageWidth,
108
+ height: imageHeight
109
+ }
110
+ };
111
+ const targetPixelBbox = (0, bbox_js_namespaceObject.finalizePixelBbox)((0, pixel_bbox_mapper_js_namespaceObject.mapLocateResultToPixelBboxByCoordinates)({
112
+ type: 'point',
113
+ coordinates: [
114
+ x,
115
+ y
116
+ ]
117
+ }, ctx, {
118
+ shape: 'point',
119
+ order: 'xy',
120
+ normalizedBy: 1000
121
+ }), parsed.coordinates, ctx);
122
+ resRect = (0, locate_result_rect_js_namespaceObject.pixelBboxToRect)((0, search_area_mapping_js_namespaceObject.mapSearchAreaPixelBboxToOriginalPixelBbox)(targetPixelBbox, options.searchConfig?.mapping));
123
+ debugInspect('auto-glm resRect:', resRect);
124
+ const element = (0, extractor_namespaceObject.generateElementByRect)(resRect, elementDescriptionText);
125
+ if (element) matchedElement = element;
126
+ }
127
+ return {
128
+ rect: resRect,
129
+ parseResult: {
130
+ element: matchedElement,
131
+ errors
132
+ },
133
+ rawResponse: rawResponseContent,
134
+ usage,
135
+ reasoning_content: parsed.think
136
+ };
137
+ }
138
+ exports.autoGlmLocate = __webpack_exports__.autoGlmLocate;
139
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
140
+ "autoGlmLocate"
141
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
142
+ Object.defineProperty(exports, '__esModule', {
143
+ value: true
144
+ });
145
+
146
+ //# sourceMappingURL=locate.js.map