@midscene/core 1.8.10 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (304) hide show
  1. package/dist/es/agent/agent.mjs +40 -50
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/task-builder.mjs +39 -19
  4. package/dist/es/agent/task-builder.mjs.map +1 -1
  5. package/dist/es/agent/tasks.mjs +24 -22
  6. package/dist/es/agent/tasks.mjs.map +1 -1
  7. package/dist/es/agent/utils.mjs +11 -14
  8. package/dist/es/agent/utils.mjs.map +1 -1
  9. package/dist/es/ai-model/connectivity.mjs +7 -3
  10. package/dist/es/ai-model/connectivity.mjs.map +1 -1
  11. package/dist/es/ai-model/errors.mjs +9 -0
  12. package/dist/es/ai-model/errors.mjs.map +1 -0
  13. package/dist/es/ai-model/index.mjs +3 -4
  14. package/dist/es/ai-model/inspect.mjs +132 -144
  15. package/dist/es/ai-model/inspect.mjs.map +1 -1
  16. package/dist/es/ai-model/llm-planning.mjs +46 -28
  17. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  18. package/dist/es/ai-model/{auto-glm → models/auto-glm}/actions.mjs +22 -44
  19. package/dist/es/ai-model/models/auto-glm/actions.mjs.map +1 -0
  20. package/dist/es/ai-model/models/auto-glm/adapter.mjs +45 -0
  21. package/dist/es/ai-model/models/auto-glm/adapter.mjs.map +1 -0
  22. package/dist/es/ai-model/models/auto-glm/locate.mjs +112 -0
  23. package/dist/es/ai-model/models/auto-glm/locate.mjs.map +1 -0
  24. package/dist/es/ai-model/models/auto-glm/parser.mjs.map +1 -0
  25. package/dist/es/ai-model/{auto-glm → models/auto-glm}/planning.mjs +6 -7
  26. package/dist/es/ai-model/models/auto-glm/planning.mjs.map +1 -0
  27. package/dist/es/ai-model/{auto-glm → models/auto-glm}/prompt.mjs +3 -11
  28. package/dist/es/ai-model/models/auto-glm/prompt.mjs.map +1 -0
  29. package/dist/es/ai-model/models/default.mjs +12 -0
  30. package/dist/es/ai-model/models/default.mjs.map +1 -0
  31. package/dist/es/ai-model/models/doubao.mjs +138 -0
  32. package/dist/es/ai-model/models/doubao.mjs.map +1 -0
  33. package/dist/es/ai-model/models/gemini.mjs +34 -0
  34. package/dist/es/ai-model/models/gemini.mjs.map +1 -0
  35. package/dist/es/ai-model/models/glm.mjs +37 -0
  36. package/dist/es/ai-model/models/glm.mjs.map +1 -0
  37. package/dist/es/ai-model/models/gpt.mjs +31 -0
  38. package/dist/es/ai-model/models/gpt.mjs.map +1 -0
  39. package/dist/es/ai-model/models/index.mjs +2 -0
  40. package/dist/es/ai-model/models/qwen.mjs +113 -0
  41. package/dist/es/ai-model/models/qwen.mjs.map +1 -0
  42. package/dist/es/ai-model/models/registry.mjs +45 -0
  43. package/dist/es/ai-model/models/registry.mjs.map +1 -0
  44. package/dist/es/ai-model/models/resolved.mjs +104 -0
  45. package/dist/es/ai-model/models/resolved.mjs.map +1 -0
  46. package/dist/es/ai-model/models/types.mjs +0 -0
  47. package/dist/es/ai-model/models/ui-tars/adapter.mjs +142 -0
  48. package/dist/es/ai-model/models/ui-tars/adapter.mjs.map +1 -0
  49. package/dist/es/ai-model/{ui-tars-planning.mjs → models/ui-tars/planning.mjs} +44 -62
  50. package/dist/es/ai-model/models/ui-tars/planning.mjs.map +1 -0
  51. package/dist/es/ai-model/prompt/extraction.mjs +3 -3
  52. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -1
  53. package/dist/es/ai-model/prompt/llm-locator.mjs +11 -11
  54. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
  55. package/dist/es/ai-model/prompt/llm-planning.mjs +25 -60
  56. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
  57. package/dist/es/ai-model/prompt/llm-section-locator.mjs +15 -10
  58. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
  59. package/dist/es/ai-model/prompt/locate-grounding-rules.mjs +9 -0
  60. package/dist/es/ai-model/prompt/locate-grounding-rules.mjs.map +1 -0
  61. package/dist/es/ai-model/prompt/locate-param-example.mjs +15 -0
  62. package/dist/es/ai-model/prompt/locate-param-example.mjs.map +1 -0
  63. package/dist/es/ai-model/prompt/playwright-generator.mjs +5 -5
  64. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
  65. package/dist/es/ai-model/prompt/yaml-generator.mjs +5 -5
  66. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
  67. package/dist/es/ai-model/prompts/locate-result-coordinates.mjs +107 -0
  68. package/dist/es/ai-model/prompts/locate-result-coordinates.mjs.map +1 -0
  69. package/dist/es/ai-model/service-caller/index.mjs +59 -190
  70. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  71. package/dist/es/ai-model/service-caller/json.mjs +60 -0
  72. package/dist/es/ai-model/service-caller/json.mjs.map +1 -0
  73. package/dist/es/ai-model/shared/model-locate-result/bbox.mjs +68 -0
  74. package/dist/es/ai-model/shared/model-locate-result/bbox.mjs.map +1 -0
  75. package/dist/es/ai-model/shared/model-locate-result/factory.mjs +96 -0
  76. package/dist/es/ai-model/shared/model-locate-result/factory.mjs.map +1 -0
  77. package/dist/es/ai-model/shared/model-locate-result/index.mjs +3 -0
  78. package/dist/es/ai-model/shared/model-locate-result/parse.mjs +41 -0
  79. package/dist/es/ai-model/shared/model-locate-result/parse.mjs.map +1 -0
  80. package/dist/es/ai-model/shared/model-locate-result/pixel-bbox-mapper.mjs +64 -0
  81. package/dist/es/ai-model/shared/model-locate-result/pixel-bbox-mapper.mjs.map +1 -0
  82. package/dist/es/ai-model/shared/model-locate-result/types.mjs +0 -0
  83. package/dist/es/ai-model/types.mjs +0 -0
  84. package/dist/es/ai-model/workflows/image-preprocess.mjs +27 -0
  85. package/dist/es/ai-model/workflows/image-preprocess.mjs.map +1 -0
  86. package/dist/es/ai-model/workflows/inspect/index.mjs +2 -0
  87. package/dist/es/ai-model/workflows/inspect/locate-result-rect.mjs +23 -0
  88. package/dist/es/ai-model/workflows/inspect/locate-result-rect.mjs.map +1 -0
  89. package/dist/es/ai-model/workflows/inspect/search-area-mapping.mjs +18 -0
  90. package/dist/es/ai-model/workflows/inspect/search-area-mapping.mjs.map +1 -0
  91. package/dist/es/ai-model/workflows/inspect/types.mjs +0 -0
  92. package/dist/es/ai-model/workflows/planning/index.mjs +5 -0
  93. package/dist/es/ai-model/workflows/planning/index.mjs.map +1 -0
  94. package/dist/es/ai-model/workflows/planning/types.mjs +0 -0
  95. package/dist/es/common.mjs +2 -174
  96. package/dist/es/common.mjs.map +1 -1
  97. package/dist/es/device/index.mjs.map +1 -1
  98. package/dist/es/service/index.mjs +96 -69
  99. package/dist/es/service/index.mjs.map +1 -1
  100. package/dist/es/types.mjs.map +1 -1
  101. package/dist/es/utils.mjs +2 -2
  102. package/dist/es/yaml/player.mjs +4 -3
  103. package/dist/es/yaml/player.mjs.map +1 -1
  104. package/dist/lib/agent/agent.js +43 -53
  105. package/dist/lib/agent/agent.js.map +1 -1
  106. package/dist/lib/agent/task-builder.js +38 -18
  107. package/dist/lib/agent/task-builder.js.map +1 -1
  108. package/dist/lib/agent/tasks.js +23 -21
  109. package/dist/lib/agent/tasks.js.map +1 -1
  110. package/dist/lib/agent/utils.js +17 -17
  111. package/dist/lib/agent/utils.js.map +1 -1
  112. package/dist/lib/ai-model/connectivity.js +7 -3
  113. package/dist/lib/ai-model/connectivity.js.map +1 -1
  114. package/dist/lib/ai-model/errors.js +46 -0
  115. package/dist/lib/ai-model/errors.js.map +1 -0
  116. package/dist/lib/ai-model/index.js +7 -14
  117. package/dist/lib/ai-model/inspect.js +141 -144
  118. package/dist/lib/ai-model/inspect.js.map +1 -1
  119. package/dist/lib/ai-model/llm-planning.js +44 -26
  120. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  121. package/dist/lib/ai-model/{auto-glm → models/auto-glm}/actions.js +22 -44
  122. package/dist/lib/ai-model/models/auto-glm/actions.js.map +1 -0
  123. package/dist/lib/ai-model/models/auto-glm/adapter.js +79 -0
  124. package/dist/lib/ai-model/models/auto-glm/adapter.js.map +1 -0
  125. package/dist/lib/ai-model/models/auto-glm/locate.js +146 -0
  126. package/dist/lib/ai-model/models/auto-glm/locate.js.map +1 -0
  127. package/dist/lib/ai-model/models/auto-glm/parser.js.map +1 -0
  128. package/dist/lib/ai-model/{auto-glm → models/auto-glm}/planning.js +8 -9
  129. package/dist/lib/ai-model/models/auto-glm/planning.js.map +1 -0
  130. package/dist/lib/ai-model/{auto-glm → models/auto-glm}/prompt.js +14 -16
  131. package/dist/lib/ai-model/models/auto-glm/prompt.js.map +1 -0
  132. package/dist/lib/ai-model/{auto-glm/util.js → models/default.js} +13 -13
  133. package/dist/lib/ai-model/models/default.js.map +1 -0
  134. package/dist/lib/ai-model/models/doubao.js +184 -0
  135. package/dist/lib/ai-model/models/doubao.js.map +1 -0
  136. package/dist/lib/ai-model/models/gemini.js +68 -0
  137. package/dist/lib/ai-model/models/gemini.js.map +1 -0
  138. package/dist/lib/ai-model/models/glm.js +71 -0
  139. package/dist/lib/ai-model/models/glm.js.map +1 -0
  140. package/dist/lib/ai-model/models/gpt.js +65 -0
  141. package/dist/lib/ai-model/models/gpt.js.map +1 -0
  142. package/dist/lib/ai-model/{service-caller/image-detail.js → models/index.js} +8 -7
  143. package/dist/lib/ai-model/models/index.js.map +1 -0
  144. package/dist/lib/ai-model/models/qwen.js +147 -0
  145. package/dist/lib/ai-model/models/qwen.js.map +1 -0
  146. package/dist/lib/ai-model/models/registry.js +85 -0
  147. package/dist/lib/ai-model/models/registry.js.map +1 -0
  148. package/dist/lib/ai-model/models/resolved.js +138 -0
  149. package/dist/lib/ai-model/models/resolved.js.map +1 -0
  150. package/dist/lib/ai-model/models/types.js +20 -0
  151. package/dist/lib/ai-model/models/types.js.map +1 -0
  152. package/dist/lib/ai-model/models/ui-tars/adapter.js +176 -0
  153. package/dist/lib/ai-model/models/ui-tars/adapter.js.map +1 -0
  154. package/dist/lib/ai-model/{ui-tars-planning.js → models/ui-tars/planning.js} +44 -62
  155. package/dist/lib/ai-model/models/ui-tars/planning.js.map +1 -0
  156. package/dist/lib/ai-model/prompt/extraction.js +3 -3
  157. package/dist/lib/ai-model/prompt/extraction.js.map +1 -1
  158. package/dist/lib/ai-model/prompt/llm-locator.js +11 -11
  159. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
  160. package/dist/lib/ai-model/prompt/llm-planning.js +25 -60
  161. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
  162. package/dist/lib/ai-model/prompt/llm-section-locator.js +15 -10
  163. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
  164. package/dist/lib/ai-model/prompt/locate-grounding-rules.js +43 -0
  165. package/dist/lib/ai-model/prompt/locate-grounding-rules.js.map +1 -0
  166. package/dist/lib/ai-model/prompt/locate-param-example.js +52 -0
  167. package/dist/lib/ai-model/prompt/locate-param-example.js.map +1 -0
  168. package/dist/lib/ai-model/prompt/playwright-generator.js +5 -5
  169. package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
  170. package/dist/lib/ai-model/prompt/yaml-generator.js +5 -5
  171. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
  172. package/dist/lib/ai-model/prompts/locate-result-coordinates.js +150 -0
  173. package/dist/lib/ai-model/prompts/locate-result-coordinates.js.map +1 -0
  174. package/dist/lib/ai-model/service-caller/index.js +68 -199
  175. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  176. package/dist/lib/ai-model/service-caller/json.js +100 -0
  177. package/dist/lib/ai-model/service-caller/json.js.map +1 -0
  178. package/dist/lib/ai-model/shared/model-locate-result/bbox.js +117 -0
  179. package/dist/lib/ai-model/shared/model-locate-result/bbox.js.map +1 -0
  180. package/dist/lib/ai-model/shared/model-locate-result/factory.js +130 -0
  181. package/dist/lib/ai-model/shared/model-locate-result/factory.js.map +1 -0
  182. package/dist/lib/ai-model/{prompt/common.js → shared/model-locate-result/index.js} +9 -9
  183. package/dist/lib/ai-model/shared/model-locate-result/index.js.map +1 -0
  184. package/dist/lib/ai-model/shared/model-locate-result/parse.js +78 -0
  185. package/dist/lib/ai-model/shared/model-locate-result/parse.js.map +1 -0
  186. package/dist/lib/ai-model/shared/model-locate-result/pixel-bbox-mapper.js +98 -0
  187. package/dist/lib/ai-model/shared/model-locate-result/pixel-bbox-mapper.js.map +1 -0
  188. package/dist/lib/ai-model/shared/model-locate-result/types.js +20 -0
  189. package/dist/lib/ai-model/shared/model-locate-result/types.js.map +1 -0
  190. package/dist/lib/ai-model/types.js +20 -0
  191. package/dist/lib/ai-model/types.js.map +1 -0
  192. package/dist/lib/ai-model/workflows/image-preprocess.js +61 -0
  193. package/dist/lib/ai-model/workflows/image-preprocess.js.map +1 -0
  194. package/dist/lib/ai-model/workflows/inspect/index.js +50 -0
  195. package/dist/lib/ai-model/workflows/inspect/index.js.map +1 -0
  196. package/dist/lib/ai-model/workflows/inspect/locate-result-rect.js +60 -0
  197. package/dist/lib/ai-model/workflows/inspect/locate-result-rect.js.map +1 -0
  198. package/dist/lib/ai-model/workflows/inspect/search-area-mapping.js +52 -0
  199. package/dist/lib/ai-model/workflows/inspect/search-area-mapping.js.map +1 -0
  200. package/dist/lib/ai-model/workflows/inspect/types.js +20 -0
  201. package/dist/lib/ai-model/workflows/inspect/types.js.map +1 -0
  202. package/dist/lib/ai-model/{model-family.js → workflows/planning/index.js} +6 -7
  203. package/dist/lib/ai-model/workflows/planning/index.js.map +1 -0
  204. package/dist/lib/ai-model/workflows/planning/types.js +20 -0
  205. package/dist/lib/ai-model/workflows/planning/types.js.map +1 -0
  206. package/dist/lib/common.js +4 -206
  207. package/dist/lib/common.js.map +1 -1
  208. package/dist/lib/device/index.js.map +1 -1
  209. package/dist/lib/service/index.js +96 -69
  210. package/dist/lib/service/index.js.map +1 -1
  211. package/dist/lib/types.js.map +1 -1
  212. package/dist/lib/utils.js +2 -2
  213. package/dist/lib/yaml/player.js +4 -3
  214. package/dist/lib/yaml/player.js.map +1 -1
  215. package/dist/types/agent/agent.d.ts +14 -6
  216. package/dist/types/agent/task-builder.d.ts +2 -2
  217. package/dist/types/agent/tasks.d.ts +6 -6
  218. package/dist/types/agent/utils.d.ts +8 -5
  219. package/dist/types/ai-model/errors.d.ts +2 -0
  220. package/dist/types/ai-model/index.d.ts +2 -4
  221. package/dist/types/ai-model/inspect.d.ts +13 -33
  222. package/dist/types/ai-model/llm-planning.d.ts +6 -17
  223. package/dist/types/ai-model/{auto-glm → models/auto-glm}/actions.d.ts +2 -2
  224. package/dist/types/ai-model/models/auto-glm/adapter.d.ts +5 -0
  225. package/dist/types/ai-model/models/auto-glm/locate.d.ts +3 -0
  226. package/dist/types/ai-model/models/auto-glm/planning.d.ts +3 -0
  227. package/dist/types/ai-model/models/auto-glm/prompt.d.ts +4 -0
  228. package/dist/types/ai-model/models/default.d.ts +2 -0
  229. package/dist/types/ai-model/models/doubao.d.ts +10 -0
  230. package/dist/types/ai-model/models/gemini.d.ts +18 -0
  231. package/dist/types/ai-model/models/glm.d.ts +18 -0
  232. package/dist/types/ai-model/models/gpt.d.ts +18 -0
  233. package/dist/types/ai-model/models/index.d.ts +2 -0
  234. package/dist/types/ai-model/models/qwen.d.ts +30 -0
  235. package/dist/types/ai-model/models/registry.d.ts +81 -0
  236. package/dist/types/ai-model/models/resolved.d.ts +9 -0
  237. package/dist/types/ai-model/models/types.d.ts +102 -0
  238. package/dist/types/ai-model/models/ui-tars/adapter.d.ts +6 -0
  239. package/dist/types/ai-model/{ui-tars-planning.d.ts → models/ui-tars/planning.d.ts} +7 -11
  240. package/dist/types/ai-model/prompt/llm-locator.d.ts +2 -2
  241. package/dist/types/ai-model/prompt/llm-planning.d.ts +5 -5
  242. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +2 -2
  243. package/dist/types/ai-model/prompt/locate-grounding-rules.d.ts +1 -0
  244. package/dist/types/ai-model/prompt/locate-param-example.d.ts +3 -0
  245. package/dist/types/ai-model/prompt/playwright-generator.d.ts +3 -3
  246. package/dist/types/ai-model/prompt/yaml-generator.d.ts +3 -3
  247. package/dist/types/ai-model/prompts/locate-result-coordinates.d.ts +6 -0
  248. package/dist/types/ai-model/service-caller/index.d.ts +19 -27
  249. package/dist/types/ai-model/service-caller/json.d.ts +9 -0
  250. package/dist/types/ai-model/shared/model-locate-result/bbox.d.ts +7 -0
  251. package/dist/types/ai-model/shared/model-locate-result/factory.d.ts +2 -0
  252. package/dist/types/ai-model/shared/model-locate-result/index.d.ts +3 -0
  253. package/dist/types/ai-model/shared/model-locate-result/parse.d.ts +5 -0
  254. package/dist/types/ai-model/shared/model-locate-result/pixel-bbox-mapper.d.ts +7 -0
  255. package/dist/types/ai-model/shared/model-locate-result/types.d.ts +157 -0
  256. package/dist/types/ai-model/types.d.ts +2 -0
  257. package/dist/types/ai-model/workflows/image-preprocess.d.ts +30 -0
  258. package/dist/types/ai-model/workflows/inspect/index.d.ts +1 -0
  259. package/dist/types/ai-model/workflows/inspect/locate-result-rect.d.ts +4 -0
  260. package/dist/types/ai-model/workflows/inspect/search-area-mapping.d.ts +3 -0
  261. package/dist/types/ai-model/workflows/inspect/types.d.ts +37 -0
  262. package/dist/types/ai-model/workflows/planning/index.d.ts +2 -0
  263. package/dist/types/ai-model/workflows/planning/types.d.ts +15 -0
  264. package/dist/types/common.d.ts +0 -30
  265. package/dist/types/device/index.d.ts +22 -22
  266. package/dist/types/service/index.d.ts +5 -4
  267. package/dist/types/types.d.ts +21 -9
  268. package/dist/types/yaml.d.ts +8 -2
  269. package/package.json +2 -2
  270. package/dist/es/ai-model/auto-glm/actions.mjs.map +0 -1
  271. package/dist/es/ai-model/auto-glm/index.mjs +0 -6
  272. package/dist/es/ai-model/auto-glm/parser.mjs.map +0 -1
  273. package/dist/es/ai-model/auto-glm/planning.mjs.map +0 -1
  274. package/dist/es/ai-model/auto-glm/prompt.mjs.map +0 -1
  275. package/dist/es/ai-model/auto-glm/util.mjs +0 -9
  276. package/dist/es/ai-model/auto-glm/util.mjs.map +0 -1
  277. package/dist/es/ai-model/model-family.mjs +0 -6
  278. package/dist/es/ai-model/model-family.mjs.map +0 -1
  279. package/dist/es/ai-model/prompt/common.mjs +0 -8
  280. package/dist/es/ai-model/prompt/common.mjs.map +0 -1
  281. package/dist/es/ai-model/service-caller/image-detail.mjs +0 -6
  282. package/dist/es/ai-model/service-caller/image-detail.mjs.map +0 -1
  283. package/dist/es/ai-model/ui-tars-planning.mjs.map +0 -1
  284. package/dist/lib/ai-model/auto-glm/actions.js.map +0 -1
  285. package/dist/lib/ai-model/auto-glm/index.js +0 -66
  286. package/dist/lib/ai-model/auto-glm/index.js.map +0 -1
  287. package/dist/lib/ai-model/auto-glm/parser.js.map +0 -1
  288. package/dist/lib/ai-model/auto-glm/planning.js.map +0 -1
  289. package/dist/lib/ai-model/auto-glm/prompt.js.map +0 -1
  290. package/dist/lib/ai-model/auto-glm/util.js.map +0 -1
  291. package/dist/lib/ai-model/model-family.js.map +0 -1
  292. package/dist/lib/ai-model/prompt/common.js.map +0 -1
  293. package/dist/lib/ai-model/service-caller/image-detail.js.map +0 -1
  294. package/dist/lib/ai-model/ui-tars-planning.js.map +0 -1
  295. package/dist/types/ai-model/auto-glm/index.d.ts +0 -6
  296. package/dist/types/ai-model/auto-glm/planning.d.ts +0 -12
  297. package/dist/types/ai-model/auto-glm/prompt.d.ts +0 -27
  298. package/dist/types/ai-model/auto-glm/util.d.ts +0 -13
  299. package/dist/types/ai-model/model-family.d.ts +0 -7
  300. package/dist/types/ai-model/prompt/common.d.ts +0 -2
  301. package/dist/types/ai-model/service-caller/image-detail.d.ts +0 -2
  302. /package/dist/es/ai-model/{auto-glm → models/auto-glm}/parser.mjs +0 -0
  303. /package/dist/lib/ai-model/{auto-glm → models/auto-glm}/parser.js +0 -0
  304. /package/dist/types/ai-model/{auto-glm → models/auto-glm}/parser.d.ts +0 -0
@@ -0,0 +1,113 @@
1
+ import { unwrapCoordinateListLikeInput } from "../shared/model-locate-result/index.mjs";
2
+ const defaultBboxSize = 20;
3
+ function topLeftPointToPixelBbox(x, y) {
4
+ return [
5
+ Math.round(x),
6
+ Math.round(y),
7
+ Math.round(x + defaultBboxSize),
8
+ Math.round(y + defaultBboxSize)
9
+ ];
10
+ }
11
+ function parseQwen25RawLocateValue(input) {
12
+ const bbox = unwrapCoordinateListLikeInput(input);
13
+ if (bbox.length < 2) {
14
+ const msg = `invalid bbox data for qwen-vl mode: ${JSON.stringify(bbox)} `;
15
+ throw new Error(msg);
16
+ }
17
+ if ('number' == typeof bbox[2] && 'number' == typeof bbox[3]) return {
18
+ type: 'bbox',
19
+ coordinates: [
20
+ bbox[0],
21
+ bbox[1],
22
+ bbox[2],
23
+ bbox[3]
24
+ ]
25
+ };
26
+ return {
27
+ type: 'point',
28
+ coordinates: [
29
+ bbox[0],
30
+ bbox[1]
31
+ ]
32
+ };
33
+ }
34
+ function normalizeQwen25ResultToPixelBbox(result) {
35
+ if ('bbox' === result.type) return [
36
+ Math.round(result.coordinates[0]),
37
+ Math.round(result.coordinates[1]),
38
+ Math.round(result.coordinates[2]),
39
+ Math.round(result.coordinates[3])
40
+ ];
41
+ return topLeftPointToPixelBbox(result.coordinates[0], result.coordinates[1]);
42
+ }
43
+ const buildQwenChatCompletionParams = (input)=>{
44
+ const { midsceneDefaults, userConfig } = input;
45
+ const { reasoningEnabled, reasoningBudget } = userConfig;
46
+ const effectiveReasoningEnabled = reasoningEnabled ?? false;
47
+ const config = {
48
+ temperature: userConfig.temperature ?? midsceneDefaults.temperature,
49
+ enable_thinking: effectiveReasoningEnabled
50
+ };
51
+ if (void 0 !== reasoningBudget) config.thinking_budget = reasoningBudget;
52
+ return {
53
+ config
54
+ };
55
+ };
56
+ const buildQwen25ChatCompletionParams = (input)=>{
57
+ const { midsceneDefaults, userConfig } = input;
58
+ return {
59
+ config: {
60
+ temperature: userConfig.temperature ?? midsceneDefaults.temperature,
61
+ vl_high_resolution_images: true
62
+ }
63
+ };
64
+ };
65
+ const qwen3Adapter = {
66
+ chatCompletion: {
67
+ unsupportedUserConfig: [
68
+ 'reasoningEffort'
69
+ ],
70
+ buildChatCompletionParams: buildQwenChatCompletionParams
71
+ },
72
+ locate: {
73
+ resultAdapter: {
74
+ coordinates: {
75
+ shape: 'bbox',
76
+ order: 'xy',
77
+ normalizedBy: 1000
78
+ }
79
+ }
80
+ }
81
+ };
82
+ const qwenAdapters = {
83
+ 'qwen2.5-vl': {
84
+ chatCompletion: {
85
+ unsupportedUserConfig: [
86
+ 'reasoningEnabled',
87
+ 'reasoningEffort',
88
+ 'reasoningBudget'
89
+ ],
90
+ buildChatCompletionParams: buildQwen25ChatCompletionParams
91
+ },
92
+ imagePreprocess: {
93
+ padBlockSize: 28
94
+ },
95
+ locate: {
96
+ resultAdapter: {
97
+ coordinates: {
98
+ shape: 'bbox',
99
+ order: 'xy'
100
+ },
101
+ parseRawLocateValue: parseQwen25RawLocateValue,
102
+ mapLocateResultToPixelBbox: normalizeQwen25ResultToPixelBbox
103
+ }
104
+ }
105
+ },
106
+ 'qwen3-vl': qwen3Adapter,
107
+ qwen3: qwen3Adapter,
108
+ 'qwen3.5': qwen3Adapter,
109
+ 'qwen3.6': qwen3Adapter
110
+ };
111
+ export { qwenAdapters };
112
+
113
+ //# sourceMappingURL=qwen.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai-model/models/qwen.mjs","sources":["../../../../src/ai-model/models/qwen.ts"],"sourcesContent":["import type { TModelFamily } from '@midscene/shared/env';\nimport {\n type LocateResultValue,\n type PixelBbox,\n unwrapCoordinateListLikeInput,\n} from '../shared/model-locate-result';\nimport type {\n ChatCompletionCallContext,\n ChatCompletionParamsResult,\n ModelAdapterDefinition,\n} from './types';\n\nconst defaultBboxSize = 20;\n\nfunction topLeftPointToPixelBbox(x: number, y: number): PixelBbox {\n return [\n Math.round(x),\n Math.round(y),\n Math.round(x + defaultBboxSize),\n Math.round(y + defaultBboxSize),\n ];\n}\n\nfunction parseQwen25RawLocateValue(input: unknown): LocateResultValue {\n const bbox = unwrapCoordinateListLikeInput(input as any) as number[];\n if (bbox.length < 2) {\n const msg = `invalid bbox data for qwen-vl mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n }\n\n if (typeof bbox[2] === 'number' && typeof bbox[3] === 'number') {\n return {\n type: 'bbox',\n coordinates: [bbox[0], bbox[1], bbox[2], bbox[3]],\n };\n }\n\n return { type: 'point', coordinates: [bbox[0], bbox[1]] };\n}\n\nfunction normalizeQwen25ResultToPixelBbox(\n result: LocateResultValue,\n): PixelBbox {\n if (result.type === 'bbox') {\n return [\n Math.round(result.coordinates[0]),\n Math.round(result.coordinates[1]),\n Math.round(result.coordinates[2]),\n Math.round(result.coordinates[3]),\n ];\n }\n\n return topLeftPointToPixelBbox(result.coordinates[0], result.coordinates[1]);\n}\n\nconst buildQwenChatCompletionParams = (\n input: ChatCompletionCallContext,\n): ChatCompletionParamsResult => {\n const { midsceneDefaults, userConfig } = input;\n const { reasoningEnabled, reasoningBudget } = userConfig;\n const effectiveReasoningEnabled = reasoningEnabled ?? false;\n const config: Record<string, unknown> = {\n temperature: userConfig.temperature ?? midsceneDefaults.temperature,\n enable_thinking: effectiveReasoningEnabled,\n };\n\n if (reasoningBudget !== undefined) {\n config.thinking_budget = reasoningBudget;\n }\n\n return { config };\n};\n\nconst buildQwen25ChatCompletionParams = (\n input: ChatCompletionCallContext,\n): ChatCompletionParamsResult => {\n const { midsceneDefaults, userConfig } = input;\n return {\n config: {\n temperature: userConfig.temperature ?? midsceneDefaults.temperature,\n vl_high_resolution_images: true,\n },\n };\n};\n\nconst qwen3Adapter: ModelAdapterDefinition = {\n chatCompletion: {\n unsupportedUserConfig: ['reasoningEffort'],\n buildChatCompletionParams: buildQwenChatCompletionParams,\n },\n locate: {\n resultAdapter: {\n coordinates: { shape: 'bbox', order: 'xy', normalizedBy: 1000 },\n },\n },\n};\n\nexport const qwenAdapters = {\n 'qwen2.5-vl': {\n chatCompletion: {\n unsupportedUserConfig: [\n 'reasoningEnabled',\n 'reasoningEffort',\n 'reasoningBudget',\n ],\n buildChatCompletionParams: buildQwen25ChatCompletionParams,\n },\n imagePreprocess: {\n padBlockSize: 28,\n },\n locate: {\n resultAdapter: {\n coordinates: { shape: 'bbox', order: 'xy' },\n parseRawLocateValue: parseQwen25RawLocateValue,\n mapLocateResultToPixelBbox: normalizeQwen25ResultToPixelBbox,\n },\n },\n },\n 'qwen3-vl': qwen3Adapter,\n qwen3: qwen3Adapter,\n 'qwen3.5': qwen3Adapter,\n 'qwen3.6': qwen3Adapter,\n} satisfies Pick<\n Record<TModelFamily, ModelAdapterDefinition>,\n 'qwen2.5-vl' | 'qwen3-vl' | 'qwen3' | 'qwen3.5' | 'qwen3.6'\n>;\n"],"names":["defaultBboxSize","topLeftPointToPixelBbox","x","y","Math","parseQwen25RawLocateValue","input","bbox","unwrapCoordinateListLikeInput","msg","JSON","Error","normalizeQwen25ResultToPixelBbox","result","buildQwenChatCompletionParams","midsceneDefaults","userConfig","reasoningEnabled","reasoningBudget","effectiveReasoningEnabled","config","undefined","buildQwen25ChatCompletionParams","qwen3Adapter","qwenAdapters"],"mappings":";AAYA,MAAMA,kBAAkB;AAExB,SAASC,wBAAwBC,CAAS,EAAEC,CAAS;IACnD,OAAO;QACLC,KAAK,KAAK,CAACF;QACXE,KAAK,KAAK,CAACD;QACXC,KAAK,KAAK,CAACF,IAAIF;QACfI,KAAK,KAAK,CAACD,IAAIH;KAChB;AACH;AAEA,SAASK,0BAA0BC,KAAc;IAC/C,MAAMC,OAAOC,8BAA8BF;IAC3C,IAAIC,KAAK,MAAM,GAAG,GAAG;QACnB,MAAME,MAAM,CAAC,oCAAoC,EAAEC,KAAK,SAAS,CAACH,MAAM,CAAC,CAAC;QAC1E,MAAM,IAAII,MAAMF;IAClB;IAEA,IAAI,AAAmB,YAAnB,OAAOF,IAAI,CAAC,EAAE,IAAiB,AAAmB,YAAnB,OAAOA,IAAI,CAAC,EAAE,EAC/C,OAAO;QACL,MAAM;QACN,aAAa;YAACA,IAAI,CAAC,EAAE;YAAEA,IAAI,CAAC,EAAE;YAAEA,IAAI,CAAC,EAAE;YAAEA,IAAI,CAAC,EAAE;SAAC;IACnD;IAGF,OAAO;QAAE,MAAM;QAAS,aAAa;YAACA,IAAI,CAAC,EAAE;YAAEA,IAAI,CAAC,EAAE;SAAC;IAAC;AAC1D;AAEA,SAASK,iCACPC,MAAyB;IAEzB,IAAIA,AAAgB,WAAhBA,OAAO,IAAI,EACb,OAAO;QACLT,KAAK,KAAK,CAACS,OAAO,WAAW,CAAC,EAAE;QAChCT,KAAK,KAAK,CAACS,OAAO,WAAW,CAAC,EAAE;QAChCT,KAAK,KAAK,CAACS,OAAO,WAAW,CAAC,EAAE;QAChCT,KAAK,KAAK,CAACS,OAAO,WAAW,CAAC,EAAE;KACjC;IAGH,OAAOZ,wBAAwBY,OAAO,WAAW,CAAC,EAAE,EAAEA,OAAO,WAAW,CAAC,EAAE;AAC7E;AAEA,MAAMC,gCAAgC,CACpCR;IAEA,MAAM,EAAES,gBAAgB,EAAEC,UAAU,EAAE,GAAGV;IACzC,MAAM,EAAEW,gBAAgB,EAAEC,eAAe,EAAE,GAAGF;IAC9C,MAAMG,4BAA4BF,oBAAoB;IACtD,MAAMG,SAAkC;QACtC,aAAaJ,WAAW,WAAW,IAAID,iBAAiB,WAAW;QACnE,iBAAiBI;IACnB;IAEA,IAAID,AAAoBG,WAApBH,iBACFE,OAAO,eAAe,GAAGF;IAG3B,OAAO;QAAEE;IAAO;AAClB;AAEA,MAAME,kCAAkC,CACtChB;IAEA,MAAM,EAAES,gBAAgB,EAAEC,UAAU,EAAE,GAAGV;IACzC,OAAO;QACL,QAAQ;YACN,aAAaU,WAAW,WAAW,IAAID,iBAAiB,WAAW;YACnE,2BAA2B;QAC7B;IACF;AACF;AAEA,MAAMQ,eAAuC;IAC3C,gBAAgB;QACd,uBAAuB;YAAC;SAAkB;QAC1C,2BAA2BT;IAC7B;IACA,QAAQ;QACN,eAAe;YACb,aAAa;gBAAE,OAAO;gBAAQ,OAAO;gBAAM,cAAc;YAAK;QAChE;IACF;AACF;AAEO,MAAMU,eAAe;IAC1B,cAAc;QACZ,gBAAgB;YACd,uBAAuB;gBACrB;gBACA;gBACA;aACD;YACD,2BAA2BF;QAC7B;QACA,iBAAiB;YACf,cAAc;QAChB;QACA,QAAQ;YACN,eAAe;gBACb,aAAa;oBAAE,OAAO;oBAAQ,OAAO;gBAAK;gBAC1C,qBAAqBjB;gBACrB,4BAA4BO;YAC9B;QACF;IACF;IACA,YAAYW;IACZ,OAAOA;IACP,WAAWA;IACX,WAAWA;AACb"}
@@ -0,0 +1,45 @@
1
+ import { getDebug } from "@midscene/shared/logger";
2
+ import { autoGlmAdapters } from "./auto-glm/adapter.mjs";
3
+ import { defaultOpenAICompatibleAdapterConfig } from "./default.mjs";
4
+ import { doubaoAdapters } from "./doubao.mjs";
5
+ import { geminiAdapters } from "./gemini.mjs";
6
+ import { glmAdapters } from "./glm.mjs";
7
+ import { gptAdapters } from "./gpt.mjs";
8
+ import { qwenAdapters } from "./qwen.mjs";
9
+ import { ResolvedModelAdapter } from "./resolved.mjs";
10
+ import { uiTarsAdapters } from "./ui-tars/adapter.mjs";
11
+ const MODEL_ADAPTER_CONFIGS = {
12
+ ...qwenAdapters,
13
+ ...doubaoAdapters,
14
+ ...geminiAdapters,
15
+ ...uiTarsAdapters,
16
+ ...glmAdapters,
17
+ ...autoGlmAdapters,
18
+ ...gptAdapters
19
+ };
20
+ const modelAdapterCache = new Map();
21
+ const debugModelAdapter = getDebug('ai:model-adapter');
22
+ function debugAdapterUnsupportedUserConfig(modelFamily, adapter) {
23
+ if (0 === adapter.chatCompletion.unsupportedUserConfig.length) return;
24
+ debugModelAdapter(`model adapter "${modelFamily}" unsupportedUserConfig: ${JSON.stringify(adapter.chatCompletion.unsupportedUserConfig)}`);
25
+ }
26
+ function getModelAdapter(modelFamily) {
27
+ const cacheKey = modelFamily ?? 'default';
28
+ let adapter = modelAdapterCache.get(cacheKey);
29
+ if (adapter) return adapter;
30
+ const config = modelFamily ? MODEL_ADAPTER_CONFIGS[modelFamily] : defaultOpenAICompatibleAdapterConfig;
31
+ if (!config) throw new Error(`No model adapter registered for modelFamily: ${modelFamily}`);
32
+ adapter = new ResolvedModelAdapter(config, cacheKey);
33
+ modelAdapterCache.set(cacheKey, adapter);
34
+ debugAdapterUnsupportedUserConfig(cacheKey, adapter);
35
+ return adapter;
36
+ }
37
+ function getModelRuntime(config) {
38
+ return {
39
+ config,
40
+ adapter: getModelAdapter(config.modelFamily)
41
+ };
42
+ }
43
+ export { MODEL_ADAPTER_CONFIGS, getModelAdapter, getModelRuntime };
44
+
45
+ //# sourceMappingURL=registry.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai-model/models/registry.mjs","sources":["../../../../src/ai-model/models/registry.ts"],"sourcesContent":["import type { IModelConfig, TModelFamily } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { autoGlmAdapters } from './auto-glm/adapter';\nimport { defaultOpenAICompatibleAdapterConfig } from './default';\nimport { doubaoAdapters } from './doubao';\nimport { geminiAdapters } from './gemini';\nimport { glmAdapters } from './glm';\nimport { gptAdapters } from './gpt';\nimport { qwenAdapters } from './qwen';\nimport { ResolvedModelAdapter } from './resolved';\nimport type {\n ModelAdapter,\n ModelAdapterDefinition,\n ModelRuntime,\n} from './types';\nimport { uiTarsAdapters } from './ui-tars/adapter';\n\nexport const MODEL_ADAPTER_CONFIGS = {\n ...qwenAdapters,\n ...doubaoAdapters,\n ...geminiAdapters,\n ...uiTarsAdapters,\n ...glmAdapters,\n ...autoGlmAdapters,\n ...gptAdapters,\n} satisfies Record<TModelFamily, ModelAdapterDefinition>;\n\ntype ModelAdapterCacheKey = TModelFamily | 'default';\n\nconst modelAdapterCache = new Map<ModelAdapterCacheKey, ModelAdapter>();\nconst debugModelAdapter = getDebug('ai:model-adapter');\n\nfunction debugAdapterUnsupportedUserConfig(\n modelFamily: ModelAdapterCacheKey,\n adapter: ModelAdapter,\n): void {\n if (adapter.chatCompletion.unsupportedUserConfig.length === 0) {\n return;\n }\n\n debugModelAdapter(\n `model adapter \"${modelFamily}\" unsupportedUserConfig: ${JSON.stringify(\n adapter.chatCompletion.unsupportedUserConfig,\n )}`,\n );\n}\n\nexport function getModelAdapter(modelFamily?: TModelFamily): ModelAdapter {\n const cacheKey: ModelAdapterCacheKey = modelFamily ?? 'default';\n let adapter = modelAdapterCache.get(cacheKey);\n if (adapter) {\n return adapter;\n }\n\n const config = modelFamily\n ? MODEL_ADAPTER_CONFIGS[modelFamily]\n : defaultOpenAICompatibleAdapterConfig;\n if (!config) {\n throw new Error(\n `No model adapter registered for modelFamily: ${modelFamily}`,\n );\n }\n\n adapter = new ResolvedModelAdapter(config, cacheKey);\n modelAdapterCache.set(cacheKey, adapter);\n debugAdapterUnsupportedUserConfig(cacheKey, adapter);\n\n return adapter;\n}\n\nexport function getModelRuntime(config: IModelConfig): ModelRuntime {\n return {\n config,\n adapter: getModelAdapter(config.modelFamily),\n };\n}\n"],"names":["MODEL_ADAPTER_CONFIGS","qwenAdapters","doubaoAdapters","geminiAdapters","uiTarsAdapters","glmAdapters","autoGlmAdapters","gptAdapters","modelAdapterCache","Map","debugModelAdapter","getDebug","debugAdapterUnsupportedUserConfig","modelFamily","adapter","JSON","getModelAdapter","cacheKey","config","defaultOpenAICompatibleAdapterConfig","Error","ResolvedModelAdapter","getModelRuntime"],"mappings":";;;;;;;;;;AAiBO,MAAMA,wBAAwB;IACnC,GAAGC,YAAY;IACf,GAAGC,cAAc;IACjB,GAAGC,cAAc;IACjB,GAAGC,cAAc;IACjB,GAAGC,WAAW;IACd,GAAGC,eAAe;IAClB,GAAGC,WAAW;AAChB;AAIA,MAAMC,oBAAoB,IAAIC;AAC9B,MAAMC,oBAAoBC,SAAS;AAEnC,SAASC,kCACPC,WAAiC,EACjCC,OAAqB;IAErB,IAAIA,AAAwD,MAAxDA,QAAQ,cAAc,CAAC,qBAAqB,CAAC,MAAM,EACrD;IAGFJ,kBACE,CAAC,eAAe,EAAEG,YAAY,yBAAyB,EAAEE,KAAK,SAAS,CACrED,QAAQ,cAAc,CAAC,qBAAqB,GAC3C;AAEP;AAEO,SAASE,gBAAgBH,WAA0B;IACxD,MAAMI,WAAiCJ,eAAe;IACtD,IAAIC,UAAUN,kBAAkB,GAAG,CAACS;IACpC,IAAIH,SACF,OAAOA;IAGT,MAAMI,SAASL,cACXb,qBAAqB,CAACa,YAAY,GAClCM;IACJ,IAAI,CAACD,QACH,MAAM,IAAIE,MACR,CAAC,6CAA6C,EAAEP,aAAa;IAIjEC,UAAU,IAAIO,qBAAqBH,QAAQD;IAC3CT,kBAAkB,GAAG,CAACS,UAAUH;IAChCF,kCAAkCK,UAAUH;IAE5C,OAAOA;AACT;AAEO,SAASQ,gBAAgBJ,MAAoB;IAClD,OAAO;QACLA;QACA,SAASF,gBAAgBE,OAAO,WAAW;IAC7C;AACF"}
@@ -0,0 +1,104 @@
1
+ import { normalJsonParser } from "../service-caller/json.mjs";
2
+ import { createLocateResultAdapter } from "../shared/model-locate-result/factory.mjs";
3
+ function _define_property(obj, key, value) {
4
+ if (key in obj) Object.defineProperty(obj, key, {
5
+ value: value,
6
+ enumerable: true,
7
+ configurable: true,
8
+ writable: true
9
+ });
10
+ else obj[key] = value;
11
+ return obj;
12
+ }
13
+ const defaultReplanningCycleLimit = 20;
14
+ const defaultImageDetail = (_input)=>void 0;
15
+ const defaultChatCompletionParams = ({ midsceneDefaults, userConfig })=>({
16
+ config: {
17
+ temperature: userConfig.temperature ?? midsceneDefaults.temperature
18
+ }
19
+ });
20
+ const midsceneChatCompletionDefaults = {
21
+ temperature: 0
22
+ };
23
+ const defaultLocateResultAdapterDefinition = {
24
+ coordinates: {
25
+ shape: 'bbox',
26
+ order: 'xy',
27
+ normalizedBy: 1000
28
+ }
29
+ };
30
+ function resolveJsonParser(jsonParser) {
31
+ if (!jsonParser || 'lenient-json' === jsonParser) return normalJsonParser;
32
+ if ('function' == typeof jsonParser) return jsonParser;
33
+ throw new Error(`Unknown json parser preset: ${jsonParser}`);
34
+ }
35
+ function resolveChatCompletion(chatCompletion) {
36
+ const buildChatCompletionParams = chatCompletion?.buildChatCompletionParams ?? defaultChatCompletionParams;
37
+ const resolveImageDetail = chatCompletion?.resolveImageDetail ?? defaultImageDetail;
38
+ const unsupportedUserConfig = chatCompletion?.unsupportedUserConfig ?? [];
39
+ return {
40
+ unsupportedUserConfig,
41
+ buildChatCompletionParams: (input)=>{
42
+ const context = {
43
+ ...input,
44
+ userConfig: input.userConfig ?? {},
45
+ midsceneDefaults: midsceneChatCompletionDefaults
46
+ };
47
+ return buildChatCompletionParams(context);
48
+ },
49
+ resolveImageDetail: (input)=>resolveImageDetail({
50
+ ...input,
51
+ userConfig: input.userConfig ?? {},
52
+ midsceneDefaults: midsceneChatCompletionDefaults
53
+ })
54
+ };
55
+ }
56
+ function resolveImagePreprocess(imagePreprocess) {
57
+ return {
58
+ padBlockSize: imagePreprocess?.padBlockSize
59
+ };
60
+ }
61
+ function resolvePlanning(planning) {
62
+ if (planning?.kind === 'custom') return {
63
+ kind: 'custom',
64
+ cacheEnabled: planning.cacheEnabled ?? true,
65
+ defaultReplanningCycleLimit: planning.defaultReplanningCycleLimit ?? defaultReplanningCycleLimit,
66
+ supportsActionDeepLocate: planning.supportsActionDeepLocate ?? false,
67
+ planFn: planning.planFn
68
+ };
69
+ return {
70
+ kind: 'standard',
71
+ cacheEnabled: planning?.cacheEnabled ?? true,
72
+ defaultReplanningCycleLimit: planning?.defaultReplanningCycleLimit ?? defaultReplanningCycleLimit,
73
+ supportsActionDeepLocate: planning?.supportsActionDeepLocate ?? true
74
+ };
75
+ }
76
+ function resolveLocate(locate) {
77
+ if (locate?.kind === 'custom') return {
78
+ kind: 'custom',
79
+ supportsSearchArea: locate.supportsSearchArea ?? false,
80
+ locateFn: locate.locateFn
81
+ };
82
+ return {
83
+ kind: 'standard',
84
+ supportsSearchArea: locate?.supportsSearchArea ?? true,
85
+ resultAdapter: createLocateResultAdapter(locate?.resultAdapter ?? defaultLocateResultAdapterDefinition)
86
+ };
87
+ }
88
+ class ResolvedModelAdapter {
89
+ constructor(config, modelFamily){
90
+ _define_property(this, "jsonParser", void 0);
91
+ _define_property(this, "chatCompletion", void 0);
92
+ _define_property(this, "imagePreprocess", void 0);
93
+ _define_property(this, "planning", void 0);
94
+ _define_property(this, "locate", void 0);
95
+ this.jsonParser = resolveJsonParser(config.jsonParser);
96
+ this.chatCompletion = resolveChatCompletion(config.chatCompletion);
97
+ this.imagePreprocess = resolveImagePreprocess(config.imagePreprocess);
98
+ this.planning = resolvePlanning(config.planning);
99
+ this.locate = resolveLocate(config.locate);
100
+ }
101
+ }
102
+ export { ResolvedModelAdapter };
103
+
104
+ //# sourceMappingURL=resolved.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai-model/models/resolved.mjs","sources":["../../../../src/ai-model/models/resolved.ts"],"sourcesContent":["import { normalJsonParser } from '../service-caller/json';\nimport { createLocateResultAdapter } from '../shared/model-locate-result/factory';\nimport type { LocateResultAdapterDefinition } from '../shared/model-locate-result/types';\nimport type {\n ChatCompletionAdapter,\n ChatCompletionCallContext,\n ChatCompletionCallInput,\n ImagePreprocessPolicy,\n JsonParser,\n LocateAdapter,\n MidsceneChatCompletionDefaults,\n ModelAdapter,\n ModelAdapterDefinition,\n PlanningAdapter,\n} from './types';\n\nconst defaultReplanningCycleLimit = 20;\n\nconst defaultImageDetail = (_input: unknown) => undefined;\n\nconst defaultChatCompletionParams = ({\n midsceneDefaults,\n userConfig,\n}: ChatCompletionCallContext) => ({\n config: {\n temperature: userConfig.temperature ?? midsceneDefaults.temperature,\n },\n});\n\nconst midsceneChatCompletionDefaults: MidsceneChatCompletionDefaults = {\n temperature: 0,\n};\n\nconst defaultLocateResultAdapterDefinition: LocateResultAdapterDefinition = {\n coordinates: { shape: 'bbox', order: 'xy', normalizedBy: 1000 },\n};\n\nfunction resolveJsonParser(\n jsonParser: ModelAdapterDefinition['jsonParser'],\n): JsonParser {\n if (!jsonParser || jsonParser === 'lenient-json') {\n return normalJsonParser;\n }\n\n if (typeof jsonParser === 'function') {\n return jsonParser;\n }\n\n throw new Error(`Unknown json parser preset: ${jsonParser}`);\n}\n\nfunction resolveChatCompletion(\n chatCompletion: ModelAdapterDefinition['chatCompletion'],\n): ChatCompletionAdapter {\n const buildChatCompletionParams =\n chatCompletion?.buildChatCompletionParams ?? defaultChatCompletionParams;\n const resolveImageDetail =\n chatCompletion?.resolveImageDetail ?? defaultImageDetail;\n const unsupportedUserConfig = chatCompletion?.unsupportedUserConfig ?? [];\n\n return {\n unsupportedUserConfig,\n buildChatCompletionParams: (input) => {\n const context = {\n ...input,\n userConfig: input.userConfig ?? {},\n midsceneDefaults: midsceneChatCompletionDefaults,\n };\n return buildChatCompletionParams(context);\n },\n resolveImageDetail: (input) =>\n resolveImageDetail({\n ...input,\n userConfig: input.userConfig ?? {},\n midsceneDefaults: midsceneChatCompletionDefaults,\n }),\n };\n}\n\nfunction resolveImagePreprocess(\n imagePreprocess: ModelAdapterDefinition['imagePreprocess'],\n): ImagePreprocessPolicy {\n return {\n padBlockSize: imagePreprocess?.padBlockSize,\n };\n}\n\nfunction resolvePlanning(\n planning: ModelAdapterDefinition['planning'],\n): PlanningAdapter {\n if (planning?.kind === 'custom') {\n return {\n kind: 'custom',\n cacheEnabled: planning.cacheEnabled ?? true,\n defaultReplanningCycleLimit:\n planning.defaultReplanningCycleLimit ?? defaultReplanningCycleLimit,\n supportsActionDeepLocate: planning.supportsActionDeepLocate ?? false,\n planFn: planning.planFn,\n };\n }\n\n return {\n kind: 'standard',\n cacheEnabled: planning?.cacheEnabled ?? true,\n defaultReplanningCycleLimit:\n planning?.defaultReplanningCycleLimit ?? defaultReplanningCycleLimit,\n supportsActionDeepLocate: planning?.supportsActionDeepLocate ?? true,\n };\n}\n\nfunction resolveLocate(\n locate: ModelAdapterDefinition['locate'],\n): LocateAdapter {\n if (locate?.kind === 'custom') {\n return {\n kind: 'custom',\n supportsSearchArea: locate.supportsSearchArea ?? false,\n locateFn: locate.locateFn,\n };\n }\n\n return {\n kind: 'standard',\n supportsSearchArea: locate?.supportsSearchArea ?? true,\n resultAdapter: createLocateResultAdapter(\n locate?.resultAdapter ?? defaultLocateResultAdapterDefinition,\n ),\n };\n}\n\nexport class ResolvedModelAdapter implements ModelAdapter {\n readonly jsonParser: JsonParser;\n readonly chatCompletion: ChatCompletionAdapter;\n readonly imagePreprocess: ImagePreprocessPolicy;\n readonly planning: PlanningAdapter;\n readonly locate: LocateAdapter;\n\n constructor(config: ModelAdapterDefinition, modelFamily: string) {\n this.jsonParser = resolveJsonParser(config.jsonParser);\n this.chatCompletion = resolveChatCompletion(config.chatCompletion);\n this.imagePreprocess = resolveImagePreprocess(config.imagePreprocess);\n this.planning = resolvePlanning(config.planning);\n this.locate = resolveLocate(config.locate);\n }\n}\n"],"names":["defaultReplanningCycleLimit","defaultImageDetail","_input","undefined","defaultChatCompletionParams","midsceneDefaults","userConfig","midsceneChatCompletionDefaults","defaultLocateResultAdapterDefinition","resolveJsonParser","jsonParser","normalJsonParser","Error","resolveChatCompletion","chatCompletion","buildChatCompletionParams","resolveImageDetail","unsupportedUserConfig","input","context","resolveImagePreprocess","imagePreprocess","resolvePlanning","planning","resolveLocate","locate","createLocateResultAdapter","ResolvedModelAdapter","config","modelFamily"],"mappings":";;;;;;;;;;;;AAgBA,MAAMA,8BAA8B;AAEpC,MAAMC,qBAAqB,CAACC,SAAoBC;AAEhD,MAAMC,8BAA8B,CAAC,EACnCC,gBAAgB,EAChBC,UAAU,EACgB,GAAM;QAChC,QAAQ;YACN,aAAaA,WAAW,WAAW,IAAID,iBAAiB,WAAW;QACrE;IACF;AAEA,MAAME,iCAAiE;IACrE,aAAa;AACf;AAEA,MAAMC,uCAAsE;IAC1E,aAAa;QAAE,OAAO;QAAQ,OAAO;QAAM,cAAc;IAAK;AAChE;AAEA,SAASC,kBACPC,UAAgD;IAEhD,IAAI,CAACA,cAAcA,AAAe,mBAAfA,YACjB,OAAOC;IAGT,IAAI,AAAsB,cAAtB,OAAOD,YACT,OAAOA;IAGT,MAAM,IAAIE,MAAM,CAAC,4BAA4B,EAAEF,YAAY;AAC7D;AAEA,SAASG,sBACPC,cAAwD;IAExD,MAAMC,4BACJD,gBAAgB,6BAA6BV;IAC/C,MAAMY,qBACJF,gBAAgB,sBAAsBb;IACxC,MAAMgB,wBAAwBH,gBAAgB,yBAAyB,EAAE;IAEzE,OAAO;QACLG;QACA,2BAA2B,CAACC;YAC1B,MAAMC,UAAU;gBACd,GAAGD,KAAK;gBACR,YAAYA,MAAM,UAAU,IAAI,CAAC;gBACjC,kBAAkBX;YACpB;YACA,OAAOQ,0BAA0BI;QACnC;QACA,oBAAoB,CAACD,QACnBF,mBAAmB;gBACjB,GAAGE,KAAK;gBACR,YAAYA,MAAM,UAAU,IAAI,CAAC;gBACjC,kBAAkBX;YACpB;IACJ;AACF;AAEA,SAASa,uBACPC,eAA0D;IAE1D,OAAO;QACL,cAAcA,iBAAiB;IACjC;AACF;AAEA,SAASC,gBACPC,QAA4C;IAE5C,IAAIA,UAAU,SAAS,UACrB,OAAO;QACL,MAAM;QACN,cAAcA,SAAS,YAAY,IAAI;QACvC,6BACEA,SAAS,2BAA2B,IAAIvB;QAC1C,0BAA0BuB,SAAS,wBAAwB,IAAI;QAC/D,QAAQA,SAAS,MAAM;IACzB;IAGF,OAAO;QACL,MAAM;QACN,cAAcA,UAAU,gBAAgB;QACxC,6BACEA,UAAU,+BAA+BvB;QAC3C,0BAA0BuB,UAAU,4BAA4B;IAClE;AACF;AAEA,SAASC,cACPC,MAAwC;IAExC,IAAIA,QAAQ,SAAS,UACnB,OAAO;QACL,MAAM;QACN,oBAAoBA,OAAO,kBAAkB,IAAI;QACjD,UAAUA,OAAO,QAAQ;IAC3B;IAGF,OAAO;QACL,MAAM;QACN,oBAAoBA,QAAQ,sBAAsB;QAClD,eAAeC,0BACbD,QAAQ,iBAAiBjB;IAE7B;AACF;AAEO,MAAMmB;IAOX,YAAYC,MAA8B,EAAEC,WAAmB,CAAE;QANjE,uBAAS,cAAT;QACA,uBAAS,kBAAT;QACA,uBAAS,mBAAT;QACA,uBAAS,YAAT;QACA,uBAAS,UAAT;QAGE,IAAI,CAAC,UAAU,GAAGpB,kBAAkBmB,OAAO,UAAU;QACrD,IAAI,CAAC,cAAc,GAAGf,sBAAsBe,OAAO,cAAc;QACjE,IAAI,CAAC,eAAe,GAAGR,uBAAuBQ,OAAO,eAAe;QACpE,IAAI,CAAC,QAAQ,GAAGN,gBAAgBM,OAAO,QAAQ;QAC/C,IAAI,CAAC,MAAM,GAAGJ,cAAcI,OAAO,MAAM;IAC3C;AACF"}
File without changes
@@ -0,0 +1,142 @@
1
+ import { UITarsModelVersion } from "@midscene/shared/env";
2
+ import { assert } from "@midscene/shared/utils";
3
+ import { jsonrepair } from "jsonrepair";
4
+ import { extractJSONFromCodeBlock, safeParseJson } from "../../service-caller/json.mjs";
5
+ import { unwrapCoordinateListLikeInput } from "../../shared/model-locate-result/index.mjs";
6
+ import { uiTarsPlanning } from "./planning.mjs";
7
+ const defaultVlmUiTarsReplanningCycleLimit = 40;
8
+ function normalizeJsonObject(obj, context = {}) {
9
+ if (null == obj) return obj;
10
+ if (Array.isArray(obj)) return obj.map((item)=>normalizeJsonObject(item, context));
11
+ if ('object' == typeof obj) {
12
+ const normalized = {};
13
+ for (const [key, value] of Object.entries(obj)){
14
+ const trimmedKey = key.trim();
15
+ const preserveStringValue = context.preserveStringValueKeys?.includes(trimmedKey) ?? false;
16
+ const normalizedValue = 'string' == typeof value ? preserveStringValue ? value : value.trim() : normalizeJsonObject(value, context);
17
+ normalized[trimmedKey] = normalizedValue;
18
+ }
19
+ return normalized;
20
+ }
21
+ return 'string' == typeof obj ? obj.trim() : obj;
22
+ }
23
+ function shouldRepairUiTarsLocateJson(source) {
24
+ return 'locate' === source || 'section-locator' === source || 'planning-action-param' === source;
25
+ }
26
+ function preprocessUiTarsLocateJson(input) {
27
+ if (input.includes('bbox')) while(/\d+\s+\d+/.test(input))input = input.replace(/(\d+)\s+(\d+)/g, '$1,$2');
28
+ return input;
29
+ }
30
+ const uiTarsJsonParser = (raw, context = {
31
+ source: 'generic-object'
32
+ })=>{
33
+ const { source } = context;
34
+ try {
35
+ return safeParseJson(raw, context);
36
+ } catch (firstError) {
37
+ if (!shouldRepairUiTarsLocateJson(source)) throw firstError;
38
+ const jsonString = preprocessUiTarsLocateJson(extractJSONFromCodeBlock(raw));
39
+ try {
40
+ return normalizeJsonObject(JSON.parse(jsonrepair(jsonString)), context);
41
+ } catch (error) {
42
+ throw Error(`failed to parse LLM response into JSON. Error - ${String(error ?? firstError ?? 'unknown error')}. Response - \n ${raw}`);
43
+ }
44
+ }
45
+ };
46
+ function parseUiTarsRawLocateValue(input) {
47
+ const bbox = unwrapCoordinateListLikeInput(input);
48
+ if ('string' == typeof bbox) {
49
+ assert(/^(\d+)\s(\d+)\s(\d+)\s(\d+)$/.test(bbox.trim()), `invalid bbox data string for ui-tars mode: ${bbox}`);
50
+ const splitted = bbox.split(' ');
51
+ if (4 === splitted.length) return {
52
+ type: 'bbox',
53
+ coordinates: [
54
+ Number(splitted[0]),
55
+ Number(splitted[1]),
56
+ Number(splitted[2]),
57
+ Number(splitted[3])
58
+ ]
59
+ };
60
+ throw new Error(`invalid bbox data string for ui-tars mode: ${bbox}`);
61
+ }
62
+ let bboxList = [];
63
+ if (Array.isArray(bbox) && 'string' == typeof bbox[0]) bbox.forEach((item)=>{
64
+ if ('string' == typeof item && item.includes(',')) {
65
+ const [x, y] = item.split(',');
66
+ bboxList.push(Number(x.trim()), Number(y.trim()));
67
+ } else if ('string' == typeof item && item.includes(' ')) {
68
+ const [x, y] = item.split(' ');
69
+ bboxList.push(Number(x.trim()), Number(y.trim()));
70
+ } else bboxList.push(Number(item));
71
+ });
72
+ else bboxList = bbox;
73
+ if (4 === bboxList.length || 5 === bboxList.length) return {
74
+ type: 'bbox',
75
+ coordinates: [
76
+ bboxList[0],
77
+ bboxList[1],
78
+ bboxList[2],
79
+ bboxList[3]
80
+ ]
81
+ };
82
+ if (6 === bboxList.length || 2 === bboxList.length || 3 === bboxList.length || 7 === bboxList.length) return {
83
+ type: 'point',
84
+ coordinates: [
85
+ bboxList[0],
86
+ bboxList[1]
87
+ ]
88
+ };
89
+ if (8 === bbox.length) return {
90
+ type: 'bbox',
91
+ coordinates: [
92
+ bboxList[0],
93
+ bboxList[1],
94
+ bboxList[4],
95
+ bboxList[5]
96
+ ]
97
+ };
98
+ const msg = `invalid bbox data for ui-tars mode: ${JSON.stringify(bbox)} `;
99
+ throw new Error(msg);
100
+ }
101
+ function createUiTarsAdapter(uiTarsModelVersion) {
102
+ return {
103
+ jsonParser: uiTarsJsonParser,
104
+ chatCompletion: {
105
+ unsupportedUserConfig: [
106
+ 'reasoningEnabled',
107
+ 'reasoningEffort',
108
+ 'reasoningBudget'
109
+ ],
110
+ buildChatCompletionParams: ({ midsceneDefaults, userConfig })=>({
111
+ config: {
112
+ temperature: userConfig.temperature ?? midsceneDefaults.temperature
113
+ }
114
+ })
115
+ },
116
+ planning: {
117
+ kind: 'custom',
118
+ cacheEnabled: false,
119
+ defaultReplanningCycleLimit: defaultVlmUiTarsReplanningCycleLimit,
120
+ planFn: (userInstruction, options)=>uiTarsPlanning(userInstruction, options, uiTarsModelVersion)
121
+ },
122
+ locate: {
123
+ resultAdapter: {
124
+ coordinates: {
125
+ shape: 'bbox',
126
+ order: 'xy',
127
+ normalizedBy: 1000
128
+ },
129
+ parseRawLocateValue: parseUiTarsRawLocateValue
130
+ }
131
+ }
132
+ };
133
+ }
134
+ const uiTarsDoubao15Adapter = createUiTarsAdapter(UITarsModelVersion.DOUBAO_1_5_20B);
135
+ const uiTarsAdapters = {
136
+ 'vlm-ui-tars': createUiTarsAdapter(UITarsModelVersion.V1_0),
137
+ 'vlm-ui-tars-doubao': uiTarsDoubao15Adapter,
138
+ 'vlm-ui-tars-doubao-1.5': uiTarsDoubao15Adapter
139
+ };
140
+ export { uiTarsAdapters };
141
+
142
+ //# sourceMappingURL=adapter.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai-model/models/ui-tars/adapter.mjs","sources":["../../../../../src/ai-model/models/ui-tars/adapter.ts"],"sourcesContent":["import { type TModelFamily, UITarsModelVersion } from '@midscene/shared/env';\nimport { assert } from '@midscene/shared/utils';\nimport { jsonrepair } from 'jsonrepair';\nimport {\n extractJSONFromCodeBlock,\n safeParseJson,\n} from '../../service-caller/json';\nimport {\n type LocateResultValue,\n unwrapCoordinateListLikeInput,\n} from '../../shared/model-locate-result';\nimport type {\n JsonParserContext,\n JsonParserSource,\n ModelAdapterDefinition,\n} from '../types';\nimport { uiTarsPlanning } from './planning';\n\nconst defaultVlmUiTarsReplanningCycleLimit = 40;\n\nfunction normalizeJsonObject(\n obj: any,\n context: Pick<JsonParserContext, 'preserveStringValueKeys'> = {},\n): any {\n if (obj === null || obj === undefined) {\n return obj;\n }\n\n if (Array.isArray(obj)) {\n return obj.map((item) => normalizeJsonObject(item, context));\n }\n\n if (typeof obj === 'object') {\n const normalized: any = {};\n for (const [key, value] of Object.entries(obj)) {\n const trimmedKey = key.trim();\n const preserveStringValue =\n context.preserveStringValueKeys?.includes(trimmedKey) ?? false;\n const normalizedValue =\n typeof value === 'string'\n ? preserveStringValue\n ? value\n : value.trim()\n : normalizeJsonObject(value, context);\n normalized[trimmedKey] = normalizedValue;\n }\n return normalized;\n }\n\n return typeof obj === 'string' ? obj.trim() : obj;\n}\n\nfunction shouldRepairUiTarsLocateJson(source: JsonParserSource) {\n return (\n source === 'locate' ||\n source === 'section-locator' ||\n source === 'planning-action-param'\n );\n}\n\nfunction preprocessUiTarsLocateJson(input: string) {\n if (input.includes('bbox')) {\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\nconst uiTarsJsonParser: ModelAdapterDefinition['jsonParser'] = (\n raw,\n context = { source: 'generic-object' },\n) => {\n const { source } = context;\n try {\n return safeParseJson(raw, context);\n } catch (firstError) {\n if (!shouldRepairUiTarsLocateJson(source)) {\n throw firstError;\n }\n\n const jsonString = preprocessUiTarsLocateJson(\n extractJSONFromCodeBlock(raw),\n );\n try {\n return normalizeJsonObject(JSON.parse(jsonrepair(jsonString)), context);\n } catch (error) {\n throw Error(\n `failed to parse LLM response into JSON. Error - ${String(\n error ?? firstError ?? 'unknown error',\n )}. Response - \\n ${raw}`,\n );\n }\n }\n};\n\n// UI-TARS has not received active updates for a long time, so this parser is\n// intentionally kept separate from Doubao even though the current logic is the\n// same. This avoids coupling UI-TARS behavior to future Doubao adapter changes.\nfunction parseUiTarsRawLocateValue(input: unknown): LocateResultValue {\n const bbox = unwrapCoordinateListLikeInput(input as any);\n if (typeof bbox === 'string') {\n assert(\n /^(\\d+)\\s(\\d+)\\s(\\d+)\\s(\\d+)$/.test(bbox.trim()),\n `invalid bbox data string for ui-tars mode: ${bbox}`,\n );\n const splitted = bbox.split(' ');\n if (splitted.length === 4) {\n return {\n type: 'bbox',\n coordinates: [\n Number(splitted[0]),\n Number(splitted[1]),\n Number(splitted[2]),\n Number(splitted[3]),\n ],\n };\n }\n throw new Error(`invalid bbox data string for ui-tars mode: ${bbox}`);\n }\n\n let bboxList: number[] = [];\n if (Array.isArray(bbox) && typeof bbox[0] === 'string') {\n bbox.forEach((item) => {\n if (typeof item === 'string' && item.includes(',')) {\n const [x, y] = item.split(',');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else if (typeof item === 'string' && item.includes(' ')) {\n const [x, y] = item.split(' ');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else {\n bboxList.push(Number(item));\n }\n });\n } else {\n bboxList = bbox as number[];\n }\n\n if (bboxList.length === 4 || bboxList.length === 5) {\n return {\n type: 'bbox',\n coordinates: [bboxList[0], bboxList[1], bboxList[2], bboxList[3]],\n };\n }\n\n if (\n bboxList.length === 6 ||\n bboxList.length === 2 ||\n bboxList.length === 3 ||\n bboxList.length === 7\n ) {\n return { type: 'point', coordinates: [bboxList[0], bboxList[1]] };\n }\n\n if (bbox.length === 8) {\n return {\n type: 'bbox',\n coordinates: [bboxList[0], bboxList[1], bboxList[4], bboxList[5]],\n };\n }\n\n const msg = `invalid bbox data for ui-tars mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n}\n\nfunction createUiTarsAdapter(\n uiTarsModelVersion: UITarsModelVersion,\n): ModelAdapterDefinition {\n return {\n jsonParser: uiTarsJsonParser,\n chatCompletion: {\n unsupportedUserConfig: [\n 'reasoningEnabled',\n 'reasoningEffort',\n 'reasoningBudget',\n ],\n buildChatCompletionParams: ({ midsceneDefaults, userConfig }) => ({\n config: {\n temperature: userConfig.temperature ?? midsceneDefaults.temperature,\n },\n }),\n },\n planning: {\n kind: 'custom',\n cacheEnabled: false,\n defaultReplanningCycleLimit: defaultVlmUiTarsReplanningCycleLimit,\n planFn: (userInstruction, options) =>\n uiTarsPlanning(userInstruction, options, uiTarsModelVersion),\n },\n locate: {\n resultAdapter: {\n coordinates: { shape: 'bbox', order: 'xy', normalizedBy: 1000 },\n parseRawLocateValue: parseUiTarsRawLocateValue,\n },\n },\n };\n}\n\nconst uiTarsDoubao15Adapter = createUiTarsAdapter(\n UITarsModelVersion.DOUBAO_1_5_20B,\n);\n\nexport const uiTarsAdapters = {\n 'vlm-ui-tars': createUiTarsAdapter(UITarsModelVersion.V1_0),\n 'vlm-ui-tars-doubao': uiTarsDoubao15Adapter,\n 'vlm-ui-tars-doubao-1.5': uiTarsDoubao15Adapter,\n} satisfies Pick<\n Record<TModelFamily, ModelAdapterDefinition>,\n 'vlm-ui-tars' | 'vlm-ui-tars-doubao' | 'vlm-ui-tars-doubao-1.5'\n>;\n"],"names":["defaultVlmUiTarsReplanningCycleLimit","normalizeJsonObject","obj","context","Array","item","normalized","key","value","Object","trimmedKey","preserveStringValue","normalizedValue","shouldRepairUiTarsLocateJson","source","preprocessUiTarsLocateJson","input","uiTarsJsonParser","raw","safeParseJson","firstError","jsonString","extractJSONFromCodeBlock","JSON","jsonrepair","error","Error","String","parseUiTarsRawLocateValue","bbox","unwrapCoordinateListLikeInput","assert","splitted","Number","bboxList","x","y","msg","createUiTarsAdapter","uiTarsModelVersion","midsceneDefaults","userConfig","userInstruction","options","uiTarsPlanning","uiTarsDoubao15Adapter","UITarsModelVersion","uiTarsAdapters"],"mappings":";;;;;;AAkBA,MAAMA,uCAAuC;AAE7C,SAASC,oBACPC,GAAQ,EACRC,UAA8D,CAAC,CAAC;IAEhE,IAAID,QAAAA,KACF,OAAOA;IAGT,IAAIE,MAAM,OAAO,CAACF,MAChB,OAAOA,IAAI,GAAG,CAAC,CAACG,OAASJ,oBAAoBI,MAAMF;IAGrD,IAAI,AAAe,YAAf,OAAOD,KAAkB;QAC3B,MAAMI,aAAkB,CAAC;QACzB,KAAK,MAAM,CAACC,KAAKC,MAAM,IAAIC,OAAO,OAAO,CAACP,KAAM;YAC9C,MAAMQ,aAAaH,IAAI,IAAI;YAC3B,MAAMI,sBACJR,QAAQ,uBAAuB,EAAE,SAASO,eAAe;YAC3D,MAAME,kBACJ,AAAiB,YAAjB,OAAOJ,QACHG,sBACEH,QACAA,MAAM,IAAI,KACZP,oBAAoBO,OAAOL;YACjCG,UAAU,CAACI,WAAW,GAAGE;QAC3B;QACA,OAAON;IACT;IAEA,OAAO,AAAe,YAAf,OAAOJ,MAAmBA,IAAI,IAAI,KAAKA;AAChD;AAEA,SAASW,6BAA6BC,MAAwB;IAC5D,OACEA,AAAW,aAAXA,UACAA,AAAW,sBAAXA,UACAA,AAAW,4BAAXA;AAEJ;AAEA,SAASC,2BAA2BC,KAAa;IAC/C,IAAIA,MAAM,QAAQ,CAAC,SACjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;IAG5C,OAAOA;AACT;AAEA,MAAMC,mBAAyD,CAC7DC,KACAf,UAAU;IAAE,QAAQ;AAAiB,CAAC;IAEtC,MAAM,EAAEW,MAAM,EAAE,GAAGX;IACnB,IAAI;QACF,OAAOgB,cAAcD,KAAKf;IAC5B,EAAE,OAAOiB,YAAY;QACnB,IAAI,CAACP,6BAA6BC,SAChC,MAAMM;QAGR,MAAMC,aAAaN,2BACjBO,yBAAyBJ;QAE3B,IAAI;YACF,OAAOjB,oBAAoBsB,KAAK,KAAK,CAACC,WAAWH,cAAclB;QACjE,EAAE,OAAOsB,OAAO;YACd,MAAMC,MACJ,CAAC,gDAAgD,EAAEC,OACjDF,SAASL,cAAc,iBACvB,gBAAgB,EAAEF,KAAK;QAE7B;IACF;AACF;AAKA,SAASU,0BAA0BZ,KAAc;IAC/C,MAAMa,OAAOC,8BAA8Bd;IAC3C,IAAI,AAAgB,YAAhB,OAAOa,MAAmB;QAC5BE,OACE,+BAA+B,IAAI,CAACF,KAAK,IAAI,KAC7C,CAAC,2CAA2C,EAAEA,MAAM;QAEtD,MAAMG,WAAWH,KAAK,KAAK,CAAC;QAC5B,IAAIG,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACL,MAAM;YACN,aAAa;gBACXC,OAAOD,QAAQ,CAAC,EAAE;gBAClBC,OAAOD,QAAQ,CAAC,EAAE;gBAClBC,OAAOD,QAAQ,CAAC,EAAE;gBAClBC,OAAOD,QAAQ,CAAC,EAAE;aACnB;QACH;QAEF,MAAM,IAAIN,MAAM,CAAC,2CAA2C,EAAEG,MAAM;IACtE;IAEA,IAAIK,WAAqB,EAAE;IAC3B,IAAI9B,MAAM,OAAO,CAACyB,SAAS,AAAmB,YAAnB,OAAOA,IAAI,CAAC,EAAE,EACvCA,KAAK,OAAO,CAAC,CAACxB;QACZ,IAAI,AAAgB,YAAhB,OAAOA,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YAClD,MAAM,CAAC8B,GAAGC,EAAE,GAAG/B,KAAK,KAAK,CAAC;YAC1B6B,SAAS,IAAI,CAACD,OAAOE,EAAE,IAAI,KAAKF,OAAOG,EAAE,IAAI;QAC/C,OAAO,IAAI,AAAgB,YAAhB,OAAO/B,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YACzD,MAAM,CAAC8B,GAAGC,EAAE,GAAG/B,KAAK,KAAK,CAAC;YAC1B6B,SAAS,IAAI,CAACD,OAAOE,EAAE,IAAI,KAAKF,OAAOG,EAAE,IAAI;QAC/C,OACEF,SAAS,IAAI,CAACD,OAAO5B;IAEzB;SAEA6B,WAAWL;IAGb,IAAIK,AAAoB,MAApBA,SAAS,MAAM,IAAUA,AAAoB,MAApBA,SAAS,MAAM,EAC1C,OAAO;QACL,MAAM;QACN,aAAa;YAACA,QAAQ,CAAC,EAAE;YAAEA,QAAQ,CAAC,EAAE;YAAEA,QAAQ,CAAC,EAAE;YAAEA,QAAQ,CAAC,EAAE;SAAC;IACnE;IAGF,IACEA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,EAEf,OAAO;QAAE,MAAM;QAAS,aAAa;YAACA,QAAQ,CAAC,EAAE;YAAEA,QAAQ,CAAC,EAAE;SAAC;IAAC;IAGlE,IAAIL,AAAgB,MAAhBA,KAAK,MAAM,EACb,OAAO;QACL,MAAM;QACN,aAAa;YAACK,QAAQ,CAAC,EAAE;YAAEA,QAAQ,CAAC,EAAE;YAAEA,QAAQ,CAAC,EAAE;YAAEA,QAAQ,CAAC,EAAE;SAAC;IACnE;IAGF,MAAMG,MAAM,CAAC,oCAAoC,EAAEd,KAAK,SAAS,CAACM,MAAM,CAAC,CAAC;IAC1E,MAAM,IAAIH,MAAMW;AAClB;AAEA,SAASC,oBACPC,kBAAsC;IAEtC,OAAO;QACL,YAAYtB;QACZ,gBAAgB;YACd,uBAAuB;gBACrB;gBACA;gBACA;aACD;YACD,2BAA2B,CAAC,EAAEuB,gBAAgB,EAAEC,UAAU,EAAE,GAAM;oBAChE,QAAQ;wBACN,aAAaA,WAAW,WAAW,IAAID,iBAAiB,WAAW;oBACrE;gBACF;QACF;QACA,UAAU;YACR,MAAM;YACN,cAAc;YACd,6BAA6BxC;YAC7B,QAAQ,CAAC0C,iBAAiBC,UACxBC,eAAeF,iBAAiBC,SAASJ;QAC7C;QACA,QAAQ;YACN,eAAe;gBACb,aAAa;oBAAE,OAAO;oBAAQ,OAAO;oBAAM,cAAc;gBAAK;gBAC9D,qBAAqBX;YACvB;QACF;IACF;AACF;AAEA,MAAMiB,wBAAwBP,oBAC5BQ,mBAAmB,cAAc;AAG5B,MAAMC,iBAAiB;IAC5B,eAAeT,oBAAoBQ,mBAAmB,IAAI;IAC1D,sBAAsBD;IACtB,0BAA0BA;AAC5B"}
@@ -2,22 +2,33 @@ import { getDebug } from "@midscene/shared/logger";
2
2
  import { transformHotkeyInput } from "@midscene/shared/us-keyboard-layout";
3
3
  import { assert } from "@midscene/shared/utils";
4
4
  import { actionParser } from "@ui-tars/action-parser";
5
- import { getSummary, getUiTarsPlanningPrompt } from "./prompt/ui-tars-planning.mjs";
6
- import { AIResponseParseError, callAIWithStringResponse } from "./service-caller/index.mjs";
5
+ import { getSummary, getUiTarsPlanningPrompt } from "../../prompt/ui-tars-planning.mjs";
6
+ import { AIResponseParseError, callAIWithStringResponse } from "../../service-caller/index.mjs";
7
+ import { finalizePixelBbox } from "../../shared/model-locate-result/bbox.mjs";
8
+ import { mapLocateResultToPixelBboxByCoordinates } from "../../shared/model-locate-result/pixel-bbox-mapper.mjs";
7
9
  const debug = getDebug('ui-tars-planning');
8
10
  const warnLog = getDebug('ui-tars-planning', {
9
11
  console: true
10
12
  });
11
- const bboxSize = 10;
12
- const pointToBbox = (point, width, height)=>[
13
- Math.round(Math.max(point.x - bboxSize / 2, 0)),
14
- Math.round(Math.max(point.y - bboxSize / 2, 0)),
15
- Math.round(Math.min(point.x + bboxSize / 2, width)),
16
- Math.round(Math.min(point.y + bboxSize / 2, height))
17
- ];
18
- async function uiTarsPlanning(userInstruction, options) {
19
- const { conversationHistory, context, modelConfig, actionContext } = options;
20
- const { uiTarsModelVersion } = modelConfig;
13
+ function pointToLocateParam(point, thought, size) {
14
+ const ctx = {
15
+ preparedSize: size
16
+ };
17
+ const pixelBbox = mapLocateResultToPixelBboxByCoordinates({
18
+ type: 'point',
19
+ coordinates: point
20
+ }, ctx, {
21
+ shape: 'point',
22
+ order: 'xy',
23
+ normalizedBy: 1
24
+ });
25
+ return {
26
+ prompt: thought || '',
27
+ locatedPixelBbox: finalizePixelBbox(pixelBbox, point, ctx)
28
+ };
29
+ }
30
+ async function uiTarsPlanning(userInstruction, options, uiTarsModelVersion) {
31
+ const { conversationHistory, context, modelRuntime, actionContext } = options;
21
32
  let instruction = userInstruction;
22
33
  if (actionContext) instruction = `<high_priority_knowledge>${actionContext}</high_priority_knowledge>\n<user_instruction>${userInstruction}</user_instruction>`;
23
34
  const systemPrompt = getUiTarsPlanningPrompt() + instruction;
@@ -39,7 +50,7 @@ async function uiTarsPlanning(userInstruction, options) {
39
50
  content: systemPrompt
40
51
  },
41
52
  ...conversationHistory.snapshot()
42
- ], modelConfig, {
53
+ ], modelRuntime, {
43
54
  abortSignal: options.abortSignal
44
55
  });
45
56
  let convertedText;
@@ -73,76 +84,46 @@ async function uiTarsPlanning(userInstruction, options) {
73
84
  const actionType = (action.action_type || '').toLowerCase();
74
85
  if ('click' === actionType) {
75
86
  assert(action.action_inputs.start_box, 'start_box is required');
76
- const point = getPoint(action.action_inputs.start_box, shotSize);
77
- const locate = {
78
- prompt: action.thought || '',
79
- bbox: pointToBbox({
80
- x: point[0],
81
- y: point[1]
82
- }, shotSize.width, shotSize.height)
83
- };
87
+ const point = getPoint(action.action_inputs.start_box);
88
+ const locate = pointToLocateParam(point, action.thought, shotSize);
84
89
  transformActions.push({
85
90
  type: 'Tap',
86
91
  param: {
87
- locate: locate
92
+ locate
88
93
  }
89
94
  });
90
95
  } else if ('left_double' === actionType) {
91
96
  assert(action.action_inputs.start_box, 'start_box is required');
92
- const point = getPoint(action.action_inputs.start_box, shotSize);
93
- const locate = {
94
- prompt: action.thought || '',
95
- bbox: pointToBbox({
96
- x: point[0],
97
- y: point[1]
98
- }, shotSize.width, shotSize.height)
99
- };
97
+ const point = getPoint(action.action_inputs.start_box);
98
+ const locate = pointToLocateParam(point, action.thought, shotSize);
100
99
  transformActions.push({
101
100
  type: 'DoubleClick',
102
101
  param: {
103
- locate: locate
102
+ locate
104
103
  },
105
104
  thought: action.thought || ''
106
105
  });
107
106
  } else if ('right_single' === actionType) {
108
107
  assert(action.action_inputs.start_box, 'start_box is required');
109
- const point = getPoint(action.action_inputs.start_box, shotSize);
110
- const locate = {
111
- prompt: action.thought || '',
112
- bbox: pointToBbox({
113
- x: point[0],
114
- y: point[1]
115
- }, shotSize.width, shotSize.height)
116
- };
108
+ const point = getPoint(action.action_inputs.start_box);
109
+ const locate = pointToLocateParam(point, action.thought, shotSize);
117
110
  transformActions.push({
118
111
  type: 'RightClick',
119
112
  param: {
120
- locate: locate
113
+ locate
121
114
  },
122
115
  thought: action.thought || ''
123
116
  });
124
117
  } else if ('drag' === actionType) {
125
118
  assert(action.action_inputs.start_box, 'start_box is required');
126
119
  assert(action.action_inputs.end_box, 'end_box is required');
127
- const startPoint = getPoint(action.action_inputs.start_box, shotSize);
128
- const endPoint = getPoint(action.action_inputs.end_box, shotSize);
120
+ const startPoint = getPoint(action.action_inputs.start_box);
121
+ const endPoint = getPoint(action.action_inputs.end_box);
129
122
  transformActions.push({
130
123
  type: 'DragAndDrop',
131
124
  param: {
132
- from: {
133
- prompt: action.thought || '',
134
- bbox: pointToBbox({
135
- x: startPoint[0],
136
- y: startPoint[1]
137
- }, shotSize.width, shotSize.height)
138
- },
139
- to: {
140
- prompt: action.thought || '',
141
- bbox: pointToBbox({
142
- x: endPoint[0],
143
- y: endPoint[1]
144
- }, shotSize.width, shotSize.height)
145
- }
125
+ from: pointToLocateParam(startPoint, action.thought, shotSize),
126
+ to: pointToLocateParam(endPoint, action.thought, shotSize)
146
127
  },
147
128
  thought: action.thought || ''
148
129
  });
@@ -165,7 +146,7 @@ async function uiTarsPlanning(userInstruction, options) {
165
146
  transformActions.push({
166
147
  type: 'Finished',
167
148
  param: {},
168
- thought: action.thought || ''
149
+ thought: action.action_inputs.content || action.thought || ''
169
150
  });
170
151
  } else if ('hotkey' === actionType) if (action.action_inputs.key) {
171
152
  const keys = transformHotkeyInput(action.action_inputs.key);
@@ -234,16 +215,17 @@ function convertBboxToCoordinates(text) {
234
215
  const y = Math.floor((y1Num + y2Num) / 2);
235
216
  return `(${x},${y})`;
236
217
  }
237
- const cleanedText = text.replace(/\[EOS\]/g, '');
218
+ const cleanedText = text.replace(/\[EOS\]/g, '').replace(/```(?:[a-zA-Z0-9_-]+)?/g, '');
238
219
  return cleanedText.replace(pattern, replaceMatch).trim();
239
220
  }
240
- function getPoint(startBox, size) {
221
+ function getPoint(startBox) {
241
222
  const [x, y] = JSON.parse(startBox);
223
+ assert('number' == typeof x && Number.isFinite(x) && 'number' == typeof y && Number.isFinite(y), `invalid point data for ui-tars planning: ${startBox}`);
242
224
  return [
243
- x * size.width,
244
- y * size.height
225
+ x,
226
+ y
245
227
  ];
246
228
  }
247
229
  export { uiTarsPlanning };
248
230
 
249
- //# sourceMappingURL=ui-tars-planning.mjs.map
231
+ //# sourceMappingURL=planning.mjs.map