@aiscene/core 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +9 -0
  3. package/dist/es/agent/agent.mjs +753 -0
  4. package/dist/es/agent/agent.mjs.map +1 -0
  5. package/dist/es/agent/common.mjs +0 -0
  6. package/dist/es/agent/execution-session.mjs +41 -0
  7. package/dist/es/agent/execution-session.mjs.map +1 -0
  8. package/dist/es/agent/index.mjs +6 -0
  9. package/dist/es/agent/task-builder.mjs +332 -0
  10. package/dist/es/agent/task-builder.mjs.map +1 -0
  11. package/dist/es/agent/task-cache.mjs +214 -0
  12. package/dist/es/agent/task-cache.mjs.map +1 -0
  13. package/dist/es/agent/tasks.mjs +423 -0
  14. package/dist/es/agent/tasks.mjs.map +1 -0
  15. package/dist/es/agent/ui-utils.mjs +91 -0
  16. package/dist/es/agent/ui-utils.mjs.map +1 -0
  17. package/dist/es/agent/utils.mjs +169 -0
  18. package/dist/es/agent/utils.mjs.map +1 -0
  19. package/dist/es/ai-model/auto-glm/actions.mjs +239 -0
  20. package/dist/es/ai-model/auto-glm/actions.mjs.map +1 -0
  21. package/dist/es/ai-model/auto-glm/index.mjs +6 -0
  22. package/dist/es/ai-model/auto-glm/parser.mjs +239 -0
  23. package/dist/es/ai-model/auto-glm/parser.mjs.map +1 -0
  24. package/dist/es/ai-model/auto-glm/planning.mjs +71 -0
  25. package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -0
  26. package/dist/es/ai-model/auto-glm/prompt.mjs +222 -0
  27. package/dist/es/ai-model/auto-glm/prompt.mjs.map +1 -0
  28. package/dist/es/ai-model/auto-glm/util.mjs +9 -0
  29. package/dist/es/ai-model/auto-glm/util.mjs.map +1 -0
  30. package/dist/es/ai-model/connectivity.mjs +138 -0
  31. package/dist/es/ai-model/connectivity.mjs.map +1 -0
  32. package/dist/es/ai-model/conversation-history.mjs +195 -0
  33. package/dist/es/ai-model/conversation-history.mjs.map +1 -0
  34. package/dist/es/ai-model/index.mjs +12 -0
  35. package/dist/es/ai-model/inspect.mjs +397 -0
  36. package/dist/es/ai-model/inspect.mjs.map +1 -0
  37. package/dist/es/ai-model/llm-planning.mjs +233 -0
  38. package/dist/es/ai-model/llm-planning.mjs.map +1 -0
  39. package/dist/es/ai-model/prompt/common.mjs +7 -0
  40. package/dist/es/ai-model/prompt/common.mjs.map +1 -0
  41. package/dist/es/ai-model/prompt/describe.mjs +66 -0
  42. package/dist/es/ai-model/prompt/describe.mjs.map +1 -0
  43. package/dist/es/ai-model/prompt/extraction.mjs +131 -0
  44. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -0
  45. package/dist/es/ai-model/prompt/llm-locator.mjs +51 -0
  46. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -0
  47. package/dist/es/ai-model/prompt/llm-planning.mjs +568 -0
  48. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -0
  49. package/dist/es/ai-model/prompt/llm-section-locator.mjs +44 -0
  50. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -0
  51. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +35 -0
  52. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -0
  53. package/dist/es/ai-model/prompt/playwright-generator.mjs +117 -0
  54. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -0
  55. package/dist/es/ai-model/prompt/ui-tars-planning.mjs +36 -0
  56. package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -0
  57. package/dist/es/ai-model/prompt/util.mjs +59 -0
  58. package/dist/es/ai-model/prompt/util.mjs.map +1 -0
  59. package/dist/es/ai-model/prompt/yaml-generator.mjs +203 -0
  60. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -0
  61. package/dist/es/ai-model/service-caller/codex-app-server.mjs +575 -0
  62. package/dist/es/ai-model/service-caller/codex-app-server.mjs.map +1 -0
  63. package/dist/es/ai-model/service-caller/image-detail.mjs +6 -0
  64. package/dist/es/ai-model/service-caller/image-detail.mjs.map +1 -0
  65. package/dist/es/ai-model/service-caller/index.mjs +475 -0
  66. package/dist/es/ai-model/service-caller/index.mjs.map +1 -0
  67. package/dist/es/ai-model/ui-tars-planning.mjs +249 -0
  68. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -0
  69. package/dist/es/common.mjs +371 -0
  70. package/dist/es/common.mjs.map +1 -0
  71. package/dist/es/device/device-options.mjs +0 -0
  72. package/dist/es/device/index.mjs +341 -0
  73. package/dist/es/device/index.mjs.map +1 -0
  74. package/dist/es/dump/html-utils.mjs +292 -0
  75. package/dist/es/dump/html-utils.mjs.map +1 -0
  76. package/dist/es/dump/index.mjs +3 -0
  77. package/dist/es/dump/screenshot-restoration.mjs +32 -0
  78. package/dist/es/dump/screenshot-restoration.mjs.map +1 -0
  79. package/dist/es/dump/screenshot-store.mjs +126 -0
  80. package/dist/es/dump/screenshot-store.mjs.map +1 -0
  81. package/dist/es/index.mjs +19 -0
  82. package/dist/es/index.mjs.map +1 -0
  83. package/dist/es/report-cli.mjs +151 -0
  84. package/dist/es/report-cli.mjs.map +1 -0
  85. package/dist/es/report-generator.mjs +205 -0
  86. package/dist/es/report-generator.mjs.map +1 -0
  87. package/dist/es/report-markdown.mjs +218 -0
  88. package/dist/es/report-markdown.mjs.map +1 -0
  89. package/dist/es/report.mjs +270 -0
  90. package/dist/es/report.mjs.map +1 -0
  91. package/dist/es/screenshot-item.mjs +122 -0
  92. package/dist/es/screenshot-item.mjs.map +1 -0
  93. package/dist/es/service/index.mjs +274 -0
  94. package/dist/es/service/index.mjs.map +1 -0
  95. package/dist/es/service/utils.mjs +15 -0
  96. package/dist/es/service/utils.mjs.map +1 -0
  97. package/dist/es/skill/index.mjs +38 -0
  98. package/dist/es/skill/index.mjs.map +1 -0
  99. package/dist/es/task-runner.mjs +263 -0
  100. package/dist/es/task-runner.mjs.map +1 -0
  101. package/dist/es/task-timing.mjs +12 -0
  102. package/dist/es/task-timing.mjs.map +1 -0
  103. package/dist/es/tree.mjs +13 -0
  104. package/dist/es/tree.mjs.map +1 -0
  105. package/dist/es/types.mjs +204 -0
  106. package/dist/es/types.mjs.map +1 -0
  107. package/dist/es/utils.mjs +234 -0
  108. package/dist/es/utils.mjs.map +1 -0
  109. package/dist/es/yaml/builder.mjs +13 -0
  110. package/dist/es/yaml/builder.mjs.map +1 -0
  111. package/dist/es/yaml/index.mjs +4 -0
  112. package/dist/es/yaml/player.mjs +442 -0
  113. package/dist/es/yaml/player.mjs.map +1 -0
  114. package/dist/es/yaml/utils.mjs +102 -0
  115. package/dist/es/yaml/utils.mjs.map +1 -0
  116. package/dist/es/yaml.mjs +0 -0
  117. package/dist/lib/agent/agent.js +801 -0
  118. package/dist/lib/agent/agent.js.map +1 -0
  119. package/dist/lib/agent/common.js +5 -0
  120. package/dist/lib/agent/execution-session.js +75 -0
  121. package/dist/lib/agent/execution-session.js.map +1 -0
  122. package/dist/lib/agent/index.js +78 -0
  123. package/dist/lib/agent/index.js.map +1 -0
  124. package/dist/lib/agent/task-builder.js +369 -0
  125. package/dist/lib/agent/task-builder.js.map +1 -0
  126. package/dist/lib/agent/task-cache.js +266 -0
  127. package/dist/lib/agent/task-cache.js.map +1 -0
  128. package/dist/lib/agent/tasks.js +466 -0
  129. package/dist/lib/agent/tasks.js.map +1 -0
  130. package/dist/lib/agent/ui-utils.js +143 -0
  131. package/dist/lib/agent/ui-utils.js.map +1 -0
  132. package/dist/lib/agent/utils.js +240 -0
  133. package/dist/lib/agent/utils.js.map +1 -0
  134. package/dist/lib/ai-model/auto-glm/actions.js +273 -0
  135. package/dist/lib/ai-model/auto-glm/actions.js.map +1 -0
  136. package/dist/lib/ai-model/auto-glm/index.js +66 -0
  137. package/dist/lib/ai-model/auto-glm/index.js.map +1 -0
  138. package/dist/lib/ai-model/auto-glm/parser.js +282 -0
  139. package/dist/lib/ai-model/auto-glm/parser.js.map +1 -0
  140. package/dist/lib/ai-model/auto-glm/planning.js +105 -0
  141. package/dist/lib/ai-model/auto-glm/planning.js.map +1 -0
  142. package/dist/lib/ai-model/auto-glm/prompt.js +259 -0
  143. package/dist/lib/ai-model/auto-glm/prompt.js.map +1 -0
  144. package/dist/lib/ai-model/auto-glm/util.js +46 -0
  145. package/dist/lib/ai-model/auto-glm/util.js.map +1 -0
  146. package/dist/lib/ai-model/connectivity.js +182 -0
  147. package/dist/lib/ai-model/connectivity.js.map +1 -0
  148. package/dist/lib/ai-model/conversation-history.js +229 -0
  149. package/dist/lib/ai-model/conversation-history.js.map +1 -0
  150. package/dist/lib/ai-model/index.js +129 -0
  151. package/dist/lib/ai-model/index.js.map +1 -0
  152. package/dist/lib/ai-model/inspect.js +443 -0
  153. package/dist/lib/ai-model/inspect.js.map +1 -0
  154. package/dist/lib/ai-model/llm-planning.js +270 -0
  155. package/dist/lib/ai-model/llm-planning.js.map +1 -0
  156. package/dist/lib/ai-model/prompt/common.js +41 -0
  157. package/dist/lib/ai-model/prompt/common.js.map +1 -0
  158. package/dist/lib/ai-model/prompt/describe.js +100 -0
  159. package/dist/lib/ai-model/prompt/describe.js.map +1 -0
  160. package/dist/lib/ai-model/prompt/extraction.js +171 -0
  161. package/dist/lib/ai-model/prompt/extraction.js.map +1 -0
  162. package/dist/lib/ai-model/prompt/llm-locator.js +88 -0
  163. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -0
  164. package/dist/lib/ai-model/prompt/llm-planning.js +605 -0
  165. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -0
  166. package/dist/lib/ai-model/prompt/llm-section-locator.js +81 -0
  167. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -0
  168. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +72 -0
  169. package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -0
  170. package/dist/lib/ai-model/prompt/playwright-generator.js +178 -0
  171. package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -0
  172. package/dist/lib/ai-model/prompt/ui-tars-planning.js +73 -0
  173. package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -0
  174. package/dist/lib/ai-model/prompt/util.js +105 -0
  175. package/dist/lib/ai-model/prompt/util.js.map +1 -0
  176. package/dist/lib/ai-model/prompt/yaml-generator.js +264 -0
  177. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -0
  178. package/dist/lib/ai-model/service-caller/codex-app-server.js +624 -0
  179. package/dist/lib/ai-model/service-caller/codex-app-server.js.map +1 -0
  180. package/dist/lib/ai-model/service-caller/image-detail.js +40 -0
  181. package/dist/lib/ai-model/service-caller/image-detail.js.map +1 -0
  182. package/dist/lib/ai-model/service-caller/index.js +540 -0
  183. package/dist/lib/ai-model/service-caller/index.js.map +1 -0
  184. package/dist/lib/ai-model/ui-tars-planning.js +283 -0
  185. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -0
  186. package/dist/lib/common.js +480 -0
  187. package/dist/lib/common.js.map +1 -0
  188. package/dist/lib/device/device-options.js +20 -0
  189. package/dist/lib/device/device-options.js.map +1 -0
  190. package/dist/lib/device/index.js +468 -0
  191. package/dist/lib/device/index.js.map +1 -0
  192. package/dist/lib/dump/html-utils.js +368 -0
  193. package/dist/lib/dump/html-utils.js.map +1 -0
  194. package/dist/lib/dump/index.js +60 -0
  195. package/dist/lib/dump/index.js.map +1 -0
  196. package/dist/lib/dump/screenshot-restoration.js +66 -0
  197. package/dist/lib/dump/screenshot-restoration.js.map +1 -0
  198. package/dist/lib/dump/screenshot-store.js +166 -0
  199. package/dist/lib/dump/screenshot-store.js.map +1 -0
  200. package/dist/lib/index.js +186 -0
  201. package/dist/lib/index.js.map +1 -0
  202. package/dist/lib/report-cli.js +191 -0
  203. package/dist/lib/report-cli.js.map +1 -0
  204. package/dist/lib/report-generator.js +246 -0
  205. package/dist/lib/report-generator.js.map +1 -0
  206. package/dist/lib/report-markdown.js +255 -0
  207. package/dist/lib/report-markdown.js.map +1 -0
  208. package/dist/lib/report.js +316 -0
  209. package/dist/lib/report.js.map +1 -0
  210. package/dist/lib/screenshot-item.js +156 -0
  211. package/dist/lib/screenshot-item.js.map +1 -0
  212. package/dist/lib/service/index.js +308 -0
  213. package/dist/lib/service/index.js.map +1 -0
  214. package/dist/lib/service/utils.js +49 -0
  215. package/dist/lib/service/utils.js.map +1 -0
  216. package/dist/lib/skill/index.js +72 -0
  217. package/dist/lib/skill/index.js.map +1 -0
  218. package/dist/lib/task-runner.js +300 -0
  219. package/dist/lib/task-runner.js.map +1 -0
  220. package/dist/lib/task-timing.js +46 -0
  221. package/dist/lib/task-timing.js.map +1 -0
  222. package/dist/lib/tree.js +53 -0
  223. package/dist/lib/tree.js.map +1 -0
  224. package/dist/lib/types.js +300 -0
  225. package/dist/lib/types.js.map +1 -0
  226. package/dist/lib/utils.js +316 -0
  227. package/dist/lib/utils.js.map +1 -0
  228. package/dist/lib/yaml/builder.js +57 -0
  229. package/dist/lib/yaml/builder.js.map +1 -0
  230. package/dist/lib/yaml/index.js +81 -0
  231. package/dist/lib/yaml/index.js.map +1 -0
  232. package/dist/lib/yaml/player.js +476 -0
  233. package/dist/lib/yaml/player.js.map +1 -0
  234. package/dist/lib/yaml/utils.js +155 -0
  235. package/dist/lib/yaml/utils.js.map +1 -0
  236. package/dist/lib/yaml.js +20 -0
  237. package/dist/lib/yaml.js.map +1 -0
  238. package/dist/types/agent/agent.d.ts +216 -0
  239. package/dist/types/agent/common.d.ts +0 -0
  240. package/dist/types/agent/execution-session.d.ts +36 -0
  241. package/dist/types/agent/index.d.ts +9 -0
  242. package/dist/types/agent/task-builder.d.ts +34 -0
  243. package/dist/types/agent/task-cache.d.ts +49 -0
  244. package/dist/types/agent/tasks.d.ts +69 -0
  245. package/dist/types/agent/ui-utils.d.ts +14 -0
  246. package/dist/types/agent/utils.d.ts +25 -0
  247. package/dist/types/ai-model/auto-glm/actions.d.ts +78 -0
  248. package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
  249. package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
  250. package/dist/types/ai-model/auto-glm/planning.d.ts +12 -0
  251. package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
  252. package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
  253. package/dist/types/ai-model/connectivity.d.ts +20 -0
  254. package/dist/types/ai-model/conversation-history.d.ts +105 -0
  255. package/dist/types/ai-model/index.d.ts +16 -0
  256. package/dist/types/ai-model/inspect.d.ts +67 -0
  257. package/dist/types/ai-model/llm-planning.d.ts +19 -0
  258. package/dist/types/ai-model/prompt/common.d.ts +2 -0
  259. package/dist/types/ai-model/prompt/describe.d.ts +1 -0
  260. package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
  261. package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
  262. package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
  263. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
  264. package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
  265. package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
  266. package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
  267. package/dist/types/ai-model/prompt/util.d.ts +33 -0
  268. package/dist/types/ai-model/prompt/yaml-generator.d.ts +102 -0
  269. package/dist/types/ai-model/service-caller/codex-app-server.d.ts +42 -0
  270. package/dist/types/ai-model/service-caller/image-detail.d.ts +2 -0
  271. package/dist/types/ai-model/service-caller/index.d.ts +49 -0
  272. package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
  273. package/dist/types/common.d.ts +288 -0
  274. package/dist/types/device/device-options.d.ts +145 -0
  275. package/dist/types/device/index.d.ts +2528 -0
  276. package/dist/types/dump/html-utils.d.ts +75 -0
  277. package/dist/types/dump/index.d.ts +5 -0
  278. package/dist/types/dump/screenshot-restoration.d.ts +8 -0
  279. package/dist/types/dump/screenshot-store.d.ts +49 -0
  280. package/dist/types/index.d.ts +21 -0
  281. package/dist/types/report-cli.d.ts +36 -0
  282. package/dist/types/report-generator.d.ts +81 -0
  283. package/dist/types/report-markdown.d.ts +24 -0
  284. package/dist/types/report.d.ts +52 -0
  285. package/dist/types/screenshot-item.d.ts +67 -0
  286. package/dist/types/service/index.d.ts +24 -0
  287. package/dist/types/service/utils.d.ts +2 -0
  288. package/dist/types/skill/index.d.ts +25 -0
  289. package/dist/types/task-runner.d.ts +50 -0
  290. package/dist/types/task-timing.d.ts +8 -0
  291. package/dist/types/tree.d.ts +4 -0
  292. package/dist/types/types.d.ts +681 -0
  293. package/dist/types/utils.d.ts +45 -0
  294. package/dist/types/yaml/builder.d.ts +2 -0
  295. package/dist/types/yaml/index.d.ts +4 -0
  296. package/dist/types/yaml/player.d.ts +34 -0
  297. package/dist/types/yaml/utils.d.ts +9 -0
  298. package/dist/types/yaml.d.ts +215 -0
  299. package/package.json +111 -0
@@ -0,0 +1,155 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.n = (module)=>{
5
+ var getter = module && module.__esModule ? ()=>module['default'] : ()=>module;
6
+ __webpack_require__.d(getter, {
7
+ a: getter
8
+ });
9
+ return getter;
10
+ };
11
+ })();
12
+ (()=>{
13
+ __webpack_require__.d = (exports1, definition)=>{
14
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
15
+ enumerable: true,
16
+ get: definition[key]
17
+ });
18
+ };
19
+ })();
20
+ (()=>{
21
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
22
+ })();
23
+ (()=>{
24
+ __webpack_require__.r = (exports1)=>{
25
+ if ("u" > typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
26
+ value: 'Module'
27
+ });
28
+ Object.defineProperty(exports1, '__esModule', {
29
+ value: true
30
+ });
31
+ };
32
+ })();
33
+ var __webpack_exports__ = {};
34
+ __webpack_require__.r(__webpack_exports__);
35
+ __webpack_require__.d(__webpack_exports__, {
36
+ buildDetailedLocateParam: ()=>buildDetailedLocateParam,
37
+ buildDetailedLocateParamAndRestParams: ()=>buildDetailedLocateParamAndRestParams,
38
+ parseYamlScript: ()=>parseYamlScript,
39
+ interpolateEnvVars: ()=>interpolateEnvVars
40
+ });
41
+ const logger_namespaceObject = require("@midscene/shared/logger");
42
+ const utils_namespaceObject = require("@midscene/shared/utils");
43
+ const external_js_yaml_namespaceObject = require("js-yaml");
44
+ var external_js_yaml_default = /*#__PURE__*/ __webpack_require__.n(external_js_yaml_namespaceObject);
45
+ const debugUtils = (0, logger_namespaceObject.getDebug)('yaml:utils');
46
+ const multimodalLocateOptionFieldMap = {
47
+ images: true,
48
+ convertHttpImage2Base64: true
49
+ };
50
+ const multimodalLocateOptionKeys = Object.keys(multimodalLocateOptionFieldMap);
51
+ function extractMultimodalPrompt(opt) {
52
+ if ('object' != typeof opt || null === opt) return;
53
+ const entries = multimodalLocateOptionKeys.map((key)=>[
54
+ key,
55
+ opt[key]
56
+ ]).filter(([, value])=>void 0 !== value);
57
+ return entries.length ? Object.fromEntries(entries) : void 0;
58
+ }
59
+ function interpolateEnvVars(content) {
60
+ const lines = content.split('\n');
61
+ const processedLines = lines.map((line)=>{
62
+ const trimmedLine = line.trimStart();
63
+ if (trimmedLine.startsWith('#')) return line;
64
+ return line.replace(/\$\{([^}]+)\}/g, (_, envVar)=>{
65
+ const value = process.env[envVar.trim()];
66
+ if (void 0 === value) throw new Error(`Environment variable "${envVar.trim()}" is not defined`);
67
+ return value;
68
+ });
69
+ });
70
+ return processedLines.join('\n');
71
+ }
72
+ function parseYamlScript(content, filePath) {
73
+ let processedContent = content;
74
+ if (-1 !== content.indexOf('android') && content.match(/deviceId:\s*(\d+)/)) {
75
+ let matchedDeviceId;
76
+ processedContent = content.replace(/deviceId:\s*(\d+)/g, (match, deviceId)=>{
77
+ matchedDeviceId = deviceId;
78
+ return `deviceId: '${deviceId}'`;
79
+ });
80
+ console.warn(`please use string-style deviceId in yaml script, for example: deviceId: "${matchedDeviceId}"`);
81
+ }
82
+ const interpolatedContent = interpolateEnvVars(processedContent);
83
+ const obj = external_js_yaml_default().load(interpolatedContent, {
84
+ schema: external_js_yaml_default().JSON_SCHEMA
85
+ });
86
+ const pathTip = filePath ? `, failed to load ${filePath}` : '';
87
+ (0, utils_namespaceObject.assert)(obj.tasks, `property "tasks" is required in yaml script ${pathTip}`);
88
+ (0, utils_namespaceObject.assert)(Array.isArray(obj.tasks), `property "tasks" must be an array in yaml script, but got ${obj.tasks}`);
89
+ return obj;
90
+ }
91
+ function buildDetailedLocateParam(locatePrompt, opt) {
92
+ debugUtils('will call buildDetailedLocateParam', locatePrompt, opt);
93
+ let normalizedLocatePrompt = locatePrompt;
94
+ if ('object' == typeof locatePrompt && null !== locatePrompt && 'prompt' in locatePrompt) {
95
+ const { prompt: innerPrompt, ...rest } = locatePrompt;
96
+ const hasMultimodalFields = Object.keys(rest).length > 0;
97
+ normalizedLocatePrompt = hasMultimodalFields ? locatePrompt : innerPrompt;
98
+ }
99
+ let prompt = normalizedLocatePrompt || opt?.prompt || opt?.locate;
100
+ let deepLocate = false;
101
+ let cacheable = true;
102
+ let xpath;
103
+ if ('object' == typeof opt && null !== opt) {
104
+ deepLocate = opt.deepLocate ?? opt.deepThink ?? false;
105
+ cacheable = opt.cacheable ?? true;
106
+ xpath = opt.xpath;
107
+ if (locatePrompt && opt.prompt && locatePrompt !== opt.prompt) console.warn('conflict prompt for item', locatePrompt, opt, 'maybe you put the prompt in the wrong place');
108
+ prompt = prompt || opt.prompt;
109
+ }
110
+ if (!prompt) return void debugUtils('no prompt, will return undefined in buildDetailedLocateParam', opt);
111
+ const multimodalPrompt = extractMultimodalPrompt(opt);
112
+ if (multimodalPrompt) prompt = 'string' == typeof prompt ? {
113
+ prompt,
114
+ ...multimodalPrompt
115
+ } : {
116
+ ...prompt,
117
+ ...multimodalPrompt
118
+ };
119
+ return {
120
+ prompt,
121
+ deepLocate,
122
+ cacheable,
123
+ xpath
124
+ };
125
+ }
126
+ function buildDetailedLocateParamAndRestParams(locatePrompt, opt, excludeKeys = []) {
127
+ const multimodalPrompt = extractMultimodalPrompt(opt);
128
+ const locateParam = buildDetailedLocateParam(locatePrompt, opt);
129
+ const restParams = {};
130
+ if ('object' == typeof opt && null !== opt) {
131
+ const allKeys = Object.keys(opt);
132
+ const locateParamKeys = Object.keys(locateParam || {});
133
+ const multimodalPromptKeys = 'object' == typeof locateParam?.prompt && locateParam?.prompt !== null ? Object.keys(multimodalPrompt || {}) : [];
134
+ for (const key of allKeys)if (!locateParamKeys.includes(key) && !multimodalPromptKeys.includes(key) && !excludeKeys.includes(key) && 'locate' !== key) restParams[key] = opt[key];
135
+ }
136
+ return {
137
+ locateParam,
138
+ restParams
139
+ };
140
+ }
141
+ exports.buildDetailedLocateParam = __webpack_exports__.buildDetailedLocateParam;
142
+ exports.buildDetailedLocateParamAndRestParams = __webpack_exports__.buildDetailedLocateParamAndRestParams;
143
+ exports.interpolateEnvVars = __webpack_exports__.interpolateEnvVars;
144
+ exports.parseYamlScript = __webpack_exports__.parseYamlScript;
145
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
146
+ "buildDetailedLocateParam",
147
+ "buildDetailedLocateParamAndRestParams",
148
+ "interpolateEnvVars",
149
+ "parseYamlScript"
150
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
151
+ Object.defineProperty(exports, '__esModule', {
152
+ value: true
153
+ });
154
+
155
+ //# sourceMappingURL=utils.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"yaml/utils.js","sources":["webpack/runtime/compat_get_default_export","webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/yaml/utils.ts"],"sourcesContent":["// getDefaultExport function for compatibility with non-ESM modules\n__webpack_require__.n = (module) => {\n\tvar getter = module && module.__esModule ?\n\t\t() => (module['default']) :\n\t\t() => (module);\n\t__webpack_require__.d(getter, { a: getter });\n\treturn getter;\n};\n","__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { TMultimodalPrompt, TUserPrompt } from '@/common';\nimport type {\n DetailedLocateParam,\n LocateOption,\n MidsceneYamlScript,\n} from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport yaml from 'js-yaml';\n\nconst debugUtils = getDebug('yaml:utils');\n\nconst multimodalLocateOptionFieldMap: Record<keyof TMultimodalPrompt, true> = {\n images: true,\n convertHttpImage2Base64: true,\n};\n\nconst multimodalLocateOptionKeys = Object.keys(\n multimodalLocateOptionFieldMap,\n) as Array<keyof TMultimodalPrompt>;\n\nfunction extractMultimodalPrompt(\n opt?: LocateOption,\n): Partial<TMultimodalPrompt> | undefined {\n if (typeof opt !== 'object' || opt === null) {\n return undefined;\n }\n\n const entries = multimodalLocateOptionKeys\n .map((key) => [key, opt[key]] as const)\n .filter(([, value]) => value !== undefined);\n\n return entries.length\n ? (Object.fromEntries(entries) as Partial<TMultimodalPrompt>)\n : undefined;\n}\n\nexport function interpolateEnvVars(content: string): string {\n // Process line by line to skip commented lines\n const lines = content.split('\\n');\n const processedLines = lines.map((line) => {\n // Check if the line is a YAML comment (starts with # after optional whitespace)\n const trimmedLine = line.trimStart();\n if (trimmedLine.startsWith('#')) {\n // Skip interpolation for comment lines\n return line;\n }\n\n // Process environment variables for non-comment lines\n return line.replace(/\\$\\{([^}]+)\\}/g, (_, envVar) => {\n const value = process.env[envVar.trim()];\n if (value === undefined) {\n throw new Error(\n `Environment variable \"${envVar.trim()}\" is not defined`,\n );\n }\n return value;\n });\n });\n\n return processedLines.join('\\n');\n}\n\nexport function parseYamlScript(\n content: string,\n filePath?: string,\n): MidsceneYamlScript {\n let processedContent = content;\n if (content.indexOf('android') !== -1 && content.match(/deviceId:\\s*(\\d+)/)) {\n let matchedDeviceId;\n processedContent = content.replace(\n /deviceId:\\s*(\\d+)/g,\n (match, deviceId) => {\n matchedDeviceId = deviceId;\n return `deviceId: '${deviceId}'`;\n },\n );\n console.warn(\n `please use string-style deviceId in yaml script, for example: deviceId: \"${matchedDeviceId}\"`,\n );\n }\n const interpolatedContent = interpolateEnvVars(processedContent);\n const obj = yaml.load(interpolatedContent, {\n schema: yaml.JSON_SCHEMA,\n }) as MidsceneYamlScript;\n\n const pathTip = filePath ? `, failed to load ${filePath}` : '';\n assert(obj.tasks, `property \"tasks\" is required in yaml script ${pathTip}`);\n assert(\n Array.isArray(obj.tasks),\n `property \"tasks\" must be an array in yaml script, but got ${obj.tasks}`,\n );\n return obj;\n}\n\nexport function buildDetailedLocateParam(\n locatePrompt: TUserPrompt,\n opt?: LocateOption,\n): DetailedLocateParam | undefined {\n debugUtils('will call buildDetailedLocateParam', locatePrompt, opt);\n // Normalize object-form TUserPrompt: when the object only contains a\n // `prompt` string (no multimodal fields like `images`), unwrap it to\n // avoid double nesting like { prompt: { prompt: '...' } }.\n let normalizedLocatePrompt: TUserPrompt = locatePrompt;\n if (\n typeof locatePrompt === 'object' &&\n locatePrompt !== null &&\n 'prompt' in locatePrompt\n ) {\n const { prompt: innerPrompt, ...rest } = locatePrompt;\n const hasMultimodalFields = Object.keys(rest).length > 0;\n normalizedLocatePrompt = hasMultimodalFields ? locatePrompt : innerPrompt;\n }\n\n let prompt = normalizedLocatePrompt || opt?.prompt || (opt as any)?.locate; // as a shortcut\n let deepLocate = false;\n let cacheable = true;\n let xpath = undefined;\n\n if (typeof opt === 'object' && opt !== null) {\n // Backward-compatible: accept `deepThink` as a deprecated alias for `deepLocate`.\n // All downstream code works on `deepLocate` only; the compatibility resolution\n // is intentionally kept here at the entry point so it does not bleed through\n // the rest of the call stack.\n deepLocate = opt.deepLocate ?? opt.deepThink ?? false;\n cacheable = opt.cacheable ?? true;\n xpath = opt.xpath;\n if (locatePrompt && opt.prompt && locatePrompt !== opt.prompt) {\n console.warn(\n 'conflict prompt for item',\n locatePrompt,\n opt,\n 'maybe you put the prompt in the wrong place',\n );\n }\n prompt = prompt || opt.prompt;\n }\n\n if (!prompt) {\n debugUtils(\n 'no prompt, will return undefined in buildDetailedLocateParam',\n opt,\n );\n return undefined;\n }\n\n const multimodalPrompt = extractMultimodalPrompt(opt);\n if (multimodalPrompt) {\n prompt =\n typeof prompt === 'string'\n ? {\n prompt,\n ...multimodalPrompt,\n }\n : {\n ...prompt,\n ...multimodalPrompt,\n };\n }\n\n return {\n prompt,\n deepLocate,\n cacheable,\n xpath,\n };\n}\n\nexport function buildDetailedLocateParamAndRestParams(\n locatePrompt: TUserPrompt,\n opt: LocateOption | undefined,\n excludeKeys: string[] = [],\n): {\n locateParam: DetailedLocateParam | undefined;\n restParams: Record<string, any>;\n} {\n const multimodalPrompt = extractMultimodalPrompt(opt);\n const locateParam = buildDetailedLocateParam(locatePrompt, opt);\n\n // Extract all keys from opt except the ones already included in locateParam\n const restParams: Record<string, any> = {};\n\n if (typeof opt === 'object' && opt !== null) {\n // Get all keys from opt\n const allKeys = Object.keys(opt);\n\n // Keys already included in locateParam: prompt, deepLocate, cacheable, xpath\n const locateParamKeys = Object.keys(locateParam || {});\n const multimodalPromptKeys =\n typeof locateParam?.prompt === 'object' && locateParam?.prompt !== null\n ? Object.keys(multimodalPrompt || {})\n : [];\n\n // Extract all other keys\n for (const key of allKeys) {\n if (\n !locateParamKeys.includes(key) &&\n !multimodalPromptKeys.includes(key) &&\n !excludeKeys.includes(key) &&\n key !== 'locate'\n ) {\n restParams[key] = opt[key as keyof LocateOption];\n }\n }\n }\n\n return {\n locateParam,\n restParams,\n };\n}\n"],"names":["__webpack_require__","module","getter","definition","key","Object","obj","prop","Symbol","debugUtils","getDebug","multimodalLocateOptionFieldMap","multimodalLocateOptionKeys","extractMultimodalPrompt","opt","entries","value","undefined","interpolateEnvVars","content","lines","processedLines","line","trimmedLine","_","envVar","process","Error","parseYamlScript","filePath","processedContent","matchedDeviceId","match","deviceId","console","interpolatedContent","yaml","pathTip","assert","Array","buildDetailedLocateParam","locatePrompt","normalizedLocatePrompt","innerPrompt","rest","hasMultimodalFields","prompt","deepLocate","cacheable","xpath","multimodalPrompt","buildDetailedLocateParamAndRestParams","excludeKeys","locateParam","restParams","allKeys","locateParamKeys","multimodalPromptKeys"],"mappings":";;;IACAA,oBAAoB,CAAC,GAAG,CAACC;QACxB,IAAIC,SAASD,UAAUA,OAAO,UAAU,GACvC,IAAOA,MAAM,CAAC,UAAU,GACxB,IAAOA;QACRD,oBAAoB,CAAC,CAACE,QAAQ;YAAE,GAAGA;QAAO;QAC1C,OAAOA;IACR;;;ICPAF,oBAAoB,CAAC,GAAG,CAAC,UAASG;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGH,oBAAoB,CAAC,CAACG,YAAYC,QAAQ,CAACJ,oBAAoB,CAAC,CAAC,UAASI,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAJ,oBAAoB,CAAC,GAAG,CAACM,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFP,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,MAAlB,OAAOQ,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;ACIA,MAAMI,aAAaC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAE5B,MAAMC,iCAAwE;IAC5E,QAAQ;IACR,yBAAyB;AAC3B;AAEA,MAAMC,6BAA6BP,OAAO,IAAI,CAC5CM;AAGF,SAASE,wBACPC,GAAkB;IAElB,IAAI,AAAe,YAAf,OAAOA,OAAoBA,AAAQ,SAARA,KAC7B;IAGF,MAAMC,UAAUH,2BACb,GAAG,CAAC,CAACR,MAAQ;YAACA;YAAKU,GAAG,CAACV,IAAI;SAAC,EAC5B,MAAM,CAAC,CAAC,GAAGY,MAAM,GAAKA,AAAUC,WAAVD;IAEzB,OAAOD,QAAQ,MAAM,GAChBV,OAAO,WAAW,CAACU,WACpBE;AACN;AAEO,SAASC,mBAAmBC,OAAe;IAEhD,MAAMC,QAAQD,QAAQ,KAAK,CAAC;IAC5B,MAAME,iBAAiBD,MAAM,GAAG,CAAC,CAACE;QAEhC,MAAMC,cAAcD,KAAK,SAAS;QAClC,IAAIC,YAAY,UAAU,CAAC,MAEzB,OAAOD;QAIT,OAAOA,KAAK,OAAO,CAAC,kBAAkB,CAACE,GAAGC;YACxC,MAAMT,QAAQU,QAAQ,GAAG,CAACD,OAAO,IAAI,GAAG;YACxC,IAAIT,AAAUC,WAAVD,OACF,MAAM,IAAIW,MACR,CAAC,sBAAsB,EAAEF,OAAO,IAAI,GAAG,gBAAgB,CAAC;YAG5D,OAAOT;QACT;IACF;IAEA,OAAOK,eAAe,IAAI,CAAC;AAC7B;AAEO,SAASO,gBACdT,OAAe,EACfU,QAAiB;IAEjB,IAAIC,mBAAmBX;IACvB,IAAIA,AAA+B,OAA/BA,QAAQ,OAAO,CAAC,cAAqBA,QAAQ,KAAK,CAAC,sBAAsB;QAC3E,IAAIY;QACJD,mBAAmBX,QAAQ,OAAO,CAChC,sBACA,CAACa,OAAOC;YACNF,kBAAkBE;YAClB,OAAO,CAAC,WAAW,EAAEA,SAAS,CAAC,CAAC;QAClC;QAEFC,QAAQ,IAAI,CACV,CAAC,yEAAyE,EAAEH,gBAAgB,CAAC,CAAC;IAElG;IACA,MAAMI,sBAAsBjB,mBAAmBY;IAC/C,MAAMxB,MAAM8B,2BAAAA,IAAS,CAACD,qBAAqB;QACzC,QAAQC,AAAAA,2BAAAA,WAAgB;IAC1B;IAEA,MAAMC,UAAUR,WAAW,CAAC,iBAAiB,EAAEA,UAAU,GAAG;IAC5DS,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOhC,IAAI,KAAK,EAAE,CAAC,4CAA4C,EAAE+B,SAAS;IAC1EC,IAAAA,sBAAAA,MAAAA,AAAAA,EACEC,MAAM,OAAO,CAACjC,IAAI,KAAK,GACvB,CAAC,0DAA0D,EAAEA,IAAI,KAAK,EAAE;IAE1E,OAAOA;AACT;AAEO,SAASkC,yBACdC,YAAyB,EACzB3B,GAAkB;IAElBL,WAAW,sCAAsCgC,cAAc3B;IAI/D,IAAI4B,yBAAsCD;IAC1C,IACE,AAAwB,YAAxB,OAAOA,gBACPA,AAAiB,SAAjBA,gBACA,YAAYA,cACZ;QACA,MAAM,EAAE,QAAQE,WAAW,EAAE,GAAGC,MAAM,GAAGH;QACzC,MAAMI,sBAAsBxC,OAAO,IAAI,CAACuC,MAAM,MAAM,GAAG;QACvDF,yBAAyBG,sBAAsBJ,eAAeE;IAChE;IAEA,IAAIG,SAASJ,0BAA0B5B,KAAK,UAAWA,KAAa;IACpE,IAAIiC,aAAa;IACjB,IAAIC,YAAY;IAChB,IAAIC;IAEJ,IAAI,AAAe,YAAf,OAAOnC,OAAoBA,AAAQ,SAARA,KAAc;QAK3CiC,aAAajC,IAAI,UAAU,IAAIA,IAAI,SAAS,IAAI;QAChDkC,YAAYlC,IAAI,SAAS,IAAI;QAC7BmC,QAAQnC,IAAI,KAAK;QACjB,IAAI2B,gBAAgB3B,IAAI,MAAM,IAAI2B,iBAAiB3B,IAAI,MAAM,EAC3DoB,QAAQ,IAAI,CACV,4BACAO,cACA3B,KACA;QAGJgC,SAASA,UAAUhC,IAAI,MAAM;IAC/B;IAEA,IAAI,CAACgC,QAAQ,YACXrC,WACE,gEACAK;IAKJ,MAAMoC,mBAAmBrC,wBAAwBC;IACjD,IAAIoC,kBACFJ,SACE,AAAkB,YAAlB,OAAOA,SACH;QACEA;QACA,GAAGI,gBAAgB;IACrB,IACA;QACE,GAAGJ,MAAM;QACT,GAAGI,gBAAgB;IACrB;IAGR,OAAO;QACLJ;QACAC;QACAC;QACAC;IACF;AACF;AAEO,SAASE,sCACdV,YAAyB,EACzB3B,GAA6B,EAC7BsC,cAAwB,EAAE;IAK1B,MAAMF,mBAAmBrC,wBAAwBC;IACjD,MAAMuC,cAAcb,yBAAyBC,cAAc3B;IAG3D,MAAMwC,aAAkC,CAAC;IAEzC,IAAI,AAAe,YAAf,OAAOxC,OAAoBA,AAAQ,SAARA,KAAc;QAE3C,MAAMyC,UAAUlD,OAAO,IAAI,CAACS;QAG5B,MAAM0C,kBAAkBnD,OAAO,IAAI,CAACgD,eAAe,CAAC;QACpD,MAAMI,uBACJ,AAA+B,YAA/B,OAAOJ,aAAa,UAAuBA,aAAa,WAAW,OAC/DhD,OAAO,IAAI,CAAC6C,oBAAoB,CAAC,KACjC,EAAE;QAGR,KAAK,MAAM9C,OAAOmD,QAChB,IACE,CAACC,gBAAgB,QAAQ,CAACpD,QAC1B,CAACqD,qBAAqB,QAAQ,CAACrD,QAC/B,CAACgD,YAAY,QAAQ,CAAChD,QACtBA,AAAQ,aAARA,KAEAkD,UAAU,CAAClD,IAAI,GAAGU,GAAG,CAACV,IAA0B;IAGtD;IAEA,OAAO;QACLiD;QACAC;IACF;AACF"}
@@ -0,0 +1,20 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.r = (exports1)=>{
5
+ if ("u" > typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
6
+ value: 'Module'
7
+ });
8
+ Object.defineProperty(exports1, '__esModule', {
9
+ value: true
10
+ });
11
+ };
12
+ })();
13
+ var __webpack_exports__ = {};
14
+ __webpack_require__.r(__webpack_exports__);
15
+ for(var __rspack_i in __webpack_exports__)exports[__rspack_i] = __webpack_exports__[__rspack_i];
16
+ Object.defineProperty(exports, '__esModule', {
17
+ value: true
18
+ });
19
+
20
+ //# sourceMappingURL=yaml.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"yaml.js","sources":["webpack/runtime/make_namespace_object"],"sourcesContent":["// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};"],"names":["__webpack_require__","Symbol","Object"],"mappings":";;;IACAA,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,MAAlB,OAAOC,UAA0BA,OAAO,WAAW,EACrDC,OAAO,cAAc,CAAC,UAASD,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEC,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D"}
@@ -0,0 +1,216 @@
1
+ import type { TUserPrompt } from '../ai-model/index';
2
+ import Service from '../service/index';
3
+ import { type ActionParam, type ActionReturn, type AgentAssertOpt, type AgentDescribeElementAtPointResult, type AgentOpt, type AgentWaitForOpt, type DeepThinkOption, type DeviceAction, ExecutionDump, type LocateOption, type LocateResultElement, type LocateValidatorResult, type LocatorValidatorOption, type OnTaskStartTip, ReportActionDump, type ScrollParam, type ServiceAction, type ServiceExtractOption, type ServiceExtractParam, type UIContext } from '../types';
4
+ import type { AbstractInterface } from '../device';
5
+ import type { TaskRunner } from '../task-runner';
6
+ import { ModelConfigManager } from '@midscene/shared/env';
7
+ import { TaskCache } from './task-cache';
8
+ import { TaskExecutor } from './tasks';
9
+ export type AiActOptions = {
10
+ cacheable?: boolean;
11
+ fileChooserAccept?: string | string[];
12
+ deepThink?: DeepThinkOption;
13
+ deepLocate?: boolean;
14
+ abortSignal?: AbortSignal;
15
+ };
16
+ export declare class Agent<InterfaceType extends AbstractInterface = AbstractInterface> {
17
+ interface: InterfaceType;
18
+ service: Service;
19
+ dump: ReportActionDump;
20
+ reportFile?: string | null;
21
+ reportFileName?: string;
22
+ taskExecutor: TaskExecutor;
23
+ opts: AgentOpt;
24
+ /**
25
+ * If true, the agent will not perform any actions
26
+ */
27
+ dryMode: boolean;
28
+ onTaskStartTip?: OnTaskStartTip;
29
+ taskCache?: TaskCache;
30
+ private dumpUpdateListeners;
31
+ get onDumpUpdate(): ((dump: string, executionDump?: ExecutionDump) => void) | undefined;
32
+ set onDumpUpdate(callback: ((dump: string, executionDump?: ExecutionDump) => void) | undefined);
33
+ destroyed: boolean;
34
+ modelConfigManager: ModelConfigManager;
35
+ /**
36
+ * Frozen page context for consistent AI operations
37
+ */
38
+ private frozenUIContext?;
39
+ private get aiActContext();
40
+ /**
41
+ * Flag to track if VL model warning has been shown
42
+ */
43
+ private hasWarnedNonVLModel;
44
+ private executionDumpIndexByRunner;
45
+ private fullActionSpace;
46
+ private reportGenerator;
47
+ get page(): InterfaceType;
48
+ /**
49
+ * Ensures VL model warning is shown once when needed
50
+ */
51
+ private ensureVLModelWarning;
52
+ private resolveReplanningCycleLimit;
53
+ constructor(interfaceInstance: InterfaceType, opts?: AgentOpt);
54
+ getActionSpace(): Promise<DeviceAction[]>;
55
+ private static readonly CONTEXT_RETRY_MAX;
56
+ private static readonly CONTEXT_RETRY_DELAY_MS;
57
+ /**
58
+ * Override in subclasses to indicate which errors are transient and should
59
+ * trigger an automatic retry when building the UI context.
60
+ * Returns `false` by default (no retry).
61
+ */
62
+ protected isRetryableContextError(_error: unknown): boolean;
63
+ getUIContext(action?: ServiceAction): Promise<UIContext>;
64
+ _snapshotContext(): Promise<UIContext>;
65
+ /**
66
+ * @deprecated Use {@link setAIActContext} instead.
67
+ */
68
+ setAIActionContext(prompt: string): Promise<void>;
69
+ setAIActContext(prompt: string): Promise<void>;
70
+ resetDump(): ReportActionDump;
71
+ appendExecutionDump(execution: ExecutionDump, runner?: TaskRunner): void;
72
+ dumpDataString(opt?: {
73
+ inlineScreenshots?: boolean;
74
+ }): string;
75
+ reportHTMLString(opt?: {
76
+ inlineScreenshots?: boolean;
77
+ }): string;
78
+ private lastExecutionDump?;
79
+ writeOutActionDumps(executionDump?: ExecutionDump): void;
80
+ private getReportMeta;
81
+ private callbackOnTaskStartTip;
82
+ wrapActionInActionSpace<T extends DeviceAction>(name: string): (param: ActionParam<T>) => Promise<ActionReturn<T>>;
83
+ callActionInActionSpace<T = any>(type: string, opt?: T): Promise<any>;
84
+ aiTap(locatePrompt: TUserPrompt, opt?: LocateOption & {
85
+ fileChooserAccept?: string | string[];
86
+ }): Promise<any>;
87
+ aiRightClick(locatePrompt: TUserPrompt, opt?: LocateOption): Promise<any>;
88
+ aiDoubleClick(locatePrompt: TUserPrompt, opt?: LocateOption): Promise<any>;
89
+ aiHover(locatePrompt: TUserPrompt, opt?: LocateOption): Promise<any>;
90
+ aiInput(locatePrompt: TUserPrompt, opt: LocateOption & {
91
+ value: string | number;
92
+ } & {
93
+ autoDismissKeyboard?: boolean;
94
+ } & {
95
+ mode?: 'replace' | 'clear' | 'typeOnly' | 'append';
96
+ }): Promise<any>;
97
+ /**
98
+ * @deprecated Use aiInput(locatePrompt, opt) instead where opt contains the value
99
+ */
100
+ aiInput(value: string | number, locatePrompt: TUserPrompt, opt?: LocateOption & {
101
+ autoDismissKeyboard?: boolean;
102
+ } & {
103
+ mode?: 'replace' | 'clear' | 'typeOnly' | 'append';
104
+ }): Promise<any>;
105
+ aiKeyboardPress(locatePrompt: TUserPrompt, opt: LocateOption & {
106
+ keyName: string;
107
+ }): Promise<any>;
108
+ /**
109
+ * @deprecated Use aiKeyboardPress(locatePrompt, opt) instead where opt contains the keyName
110
+ */
111
+ aiKeyboardPress(keyName: string, locatePrompt?: TUserPrompt, opt?: LocateOption): Promise<any>;
112
+ aiScroll(locatePrompt: TUserPrompt | undefined, opt: LocateOption & ScrollParam): Promise<any>;
113
+ /**
114
+ * @deprecated Use aiScroll(locatePrompt, opt) instead where opt contains the scroll parameters
115
+ */
116
+ aiScroll(scrollParam: ScrollParam, locatePrompt?: TUserPrompt, opt?: LocateOption): Promise<any>;
117
+ aiPinch(locatePrompt: TUserPrompt | undefined, opt: LocateOption & {
118
+ direction: 'in' | 'out';
119
+ distance?: number;
120
+ duration?: number;
121
+ }): Promise<any>;
122
+ aiAct(taskPrompt: string, opt?: AiActOptions): Promise<string | undefined>;
123
+ /**
124
+ * @deprecated Use {@link Agent.aiAct} instead.
125
+ */
126
+ aiAction(taskPrompt: string, opt?: AiActOptions): Promise<string | undefined>;
127
+ aiQuery<ReturnType = any>(demand: ServiceExtractParam, opt?: ServiceExtractOption): Promise<ReturnType>;
128
+ aiBoolean(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<boolean>;
129
+ aiNumber(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<number>;
130
+ aiString(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<string>;
131
+ aiAsk(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<string>;
132
+ describeElementAtPoint(center: [number, number], opt?: {
133
+ verifyPrompt?: boolean;
134
+ retryLimit?: number;
135
+ deepLocate?: boolean;
136
+ } & LocatorValidatorOption): Promise<AgentDescribeElementAtPointResult>;
137
+ verifyLocator(prompt: string, locateOpt: LocateOption | undefined, expectCenter: [number, number], verifyLocateOption?: LocatorValidatorOption): Promise<LocateValidatorResult>;
138
+ /**
139
+ * Locate an element and return both its center point and an approximate rect.
140
+ *
141
+ * - In most locate flows, `rect` represents the matched element boundary.
142
+ * - Some models only support point grounding instead of boundary grounding.
143
+ * In those cases (for example, AutoGLM), `rect` falls back to a small 8x8
144
+ * box centered on the located point.
145
+ *
146
+ * Because `rect` may vary with the underlying model capability, avoid relying
147
+ * on it too heavily for strict boundary semantics. If you need a stable click
148
+ * target, prefer `center`.
149
+ */
150
+ aiLocate(prompt: TUserPrompt, opt?: LocateOption): Promise<Pick<LocateResultElement, "rect" | "center">>;
151
+ aiAssert(assertion: TUserPrompt, msg?: string, opt?: AgentAssertOpt & ServiceExtractOption): Promise<{
152
+ pass: boolean;
153
+ thought: string | undefined;
154
+ message: string | undefined;
155
+ } | undefined>;
156
+ aiWaitFor(assertion: TUserPrompt, opt?: AgentWaitForOpt): Promise<void>;
157
+ ai(...args: Parameters<typeof this.aiAct>): Promise<string | undefined>;
158
+ runYaml(yamlScriptContent: string): Promise<{
159
+ result: Record<string, any>;
160
+ }>;
161
+ evaluateJavaScript(script: string): Promise<any>;
162
+ /**
163
+ * Add a dump update listener
164
+ * @param listener Listener function
165
+ * @returns A remove function that can be called to remove this listener
166
+ */
167
+ addDumpUpdateListener(listener: (dump: string, executionDump?: ExecutionDump) => void): () => void;
168
+ /**
169
+ * Remove a dump update listener
170
+ * @param listener The listener function to remove
171
+ */
172
+ removeDumpUpdateListener(listener: (dump: string, executionDump?: ExecutionDump) => void): void;
173
+ /**
174
+ * Clear all dump update listeners
175
+ */
176
+ clearDumpUpdateListeners(): void;
177
+ destroy(): Promise<void>;
178
+ recordToReport(title?: string, opt?: {
179
+ content: string;
180
+ }): Promise<void>;
181
+ /**
182
+ * @deprecated Use {@link Agent.recordToReport} instead.
183
+ */
184
+ logScreenshot(title?: string, opt?: {
185
+ content: string;
186
+ }): Promise<void>;
187
+ _unstableLogContent(): {
188
+ groupName: string;
189
+ groupDescription: string | undefined;
190
+ executions: ExecutionDump[];
191
+ };
192
+ /**
193
+ * Freezes the current page context to be reused in subsequent AI operations
194
+ * This avoids recalculating page context for each operation
195
+ */
196
+ freezePageContext(): Promise<void>;
197
+ /**
198
+ * Unfreezes the page context, allowing AI operations to calculate context dynamically
199
+ */
200
+ unfreezePageContext(): Promise<void>;
201
+ /**
202
+ * Process cache configuration and return normalized cache settings
203
+ */
204
+ private processCacheConfig;
205
+ private normalizeFilePaths;
206
+ private normalizeFileInput;
207
+ /**
208
+ * Manually flush cache to file
209
+ * @param options - Optional configuration
210
+ * @param options.cleanUnused - If true, removes unused cache records before flushing
211
+ */
212
+ flushCache(options?: {
213
+ cleanUnused?: boolean;
214
+ }): Promise<void>;
215
+ }
216
+ export declare const createAgent: (interfaceInstance: AbstractInterface, opts?: AgentOpt) => Agent<AbstractInterface>;
File without changes
@@ -0,0 +1,36 @@
1
+ import { type TaskExecutionError, TaskRunner } from '../task-runner';
2
+ import type { ExecutionTaskApply, ExecutionTaskProgressOptions, UIContext } from '../types';
3
+ type ExecutionSessionOptions = ExecutionTaskProgressOptions & {
4
+ tasks?: ExecutionTaskApply[];
5
+ onTaskUpdate?: (runner: TaskRunner, error?: TaskExecutionError) => Promise<void> | void;
6
+ };
7
+ /**
8
+ * Thin wrapper around {@link TaskRunner} that represents a single linear execution run.
9
+ */
10
+ export declare class ExecutionSession {
11
+ private readonly runner;
12
+ constructor(name: string, contextProvider: () => Promise<UIContext>, options?: ExecutionSessionOptions);
13
+ append(tasks: ExecutionTaskApply[] | ExecutionTaskApply, options?: {
14
+ allowWhenError?: boolean;
15
+ }): Promise<void>;
16
+ appendAndRun(tasks: ExecutionTaskApply[] | ExecutionTaskApply, options?: {
17
+ allowWhenError?: boolean;
18
+ }): Promise<{
19
+ output: any;
20
+ thought?: string;
21
+ } | undefined>;
22
+ run(options?: {
23
+ allowWhenError?: boolean;
24
+ }): Promise<{
25
+ output: any;
26
+ thought?: string;
27
+ } | undefined>;
28
+ isInErrorState(): boolean;
29
+ latestErrorTask(): import("../types").ExecutionTask | null;
30
+ appendErrorPlan(errorMsg: string): Promise<{
31
+ output: undefined;
32
+ runner: TaskRunner;
33
+ }>;
34
+ getRunner(): TaskRunner;
35
+ }
36
+ export {};
@@ -0,0 +1,9 @@
1
+ export { Agent, createAgent } from './agent';
2
+ export { commonContextParser } from './utils';
3
+ export { getReportFileName, printReportMsg, } from './utils';
4
+ export { extractInsightParam, locateParamStr, paramStr, taskTitleStr, typeStr, } from './ui-utils';
5
+ export { type LocateCache, type PlanningCache, TaskCache } from './task-cache';
6
+ export { cacheFileExt } from './task-cache';
7
+ export { TaskExecutor } from './tasks';
8
+ export type { AgentOpt } from '../types';
9
+ export type { AiActOptions } from './agent';
@@ -0,0 +1,34 @@
1
+ import type { AbstractInterface } from '../device';
2
+ import type Service from '../service';
3
+ import type { DetailedLocateParam, DeviceAction, ExecutionTaskApply, PlanningAction, PlanningLocateParam } from '../types';
4
+ import type { IModelConfig } from '@midscene/shared/env';
5
+ import type { TaskCache } from './task-cache';
6
+ export declare function locatePlanForLocate(param: string | DetailedLocateParam): PlanningAction<PlanningLocateParam>;
7
+ interface TaskBuilderDeps {
8
+ interfaceInstance: AbstractInterface;
9
+ service: Service;
10
+ taskCache?: TaskCache;
11
+ actionSpace: DeviceAction[];
12
+ waitAfterAction?: number;
13
+ }
14
+ interface BuildOptions {
15
+ cacheable?: boolean;
16
+ deepLocate?: boolean;
17
+ abortSignal?: AbortSignal;
18
+ }
19
+ export declare class TaskBuilder {
20
+ private readonly interface;
21
+ private readonly service;
22
+ private readonly taskCache?;
23
+ private readonly actionSpace;
24
+ private readonly waitAfterAction?;
25
+ constructor({ interfaceInstance, service, taskCache, actionSpace, waitAfterAction, }: TaskBuilderDeps);
26
+ build(plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, options?: BuildOptions): Promise<{
27
+ tasks: ExecutionTaskApply[];
28
+ }>;
29
+ private handleFinishedPlan;
30
+ private handleLocatePlan;
31
+ private handleActionPlan;
32
+ private createLocateTask;
33
+ }
34
+ export {};
@@ -0,0 +1,49 @@
1
+ import type { TUserPrompt } from '../ai-model';
2
+ import type { ElementCacheFeature } from '../types';
3
+ export declare const debug: import("@midscene/shared/logger").DebugFunction;
4
+ export interface PlanningCache {
5
+ type: 'plan';
6
+ prompt: string;
7
+ yamlWorkflow: string;
8
+ }
9
+ export interface LocateCache {
10
+ type: 'locate';
11
+ prompt: TUserPrompt;
12
+ cache?: ElementCacheFeature;
13
+ /** @deprecated kept for backward compatibility */
14
+ xpaths?: string[];
15
+ }
16
+ export interface MatchCacheResult<T extends PlanningCache | LocateCache> {
17
+ cacheContent: T;
18
+ cacheUsable: boolean;
19
+ updateFn: (cb: (cache: T) => void) => void;
20
+ }
21
+ export type CacheFileContent = {
22
+ midsceneVersion: string;
23
+ cacheId: string;
24
+ caches: Array<PlanningCache | LocateCache>;
25
+ };
26
+ export declare const cacheFileExt = ".cache.yaml";
27
+ export declare class TaskCache {
28
+ cacheId: string;
29
+ cacheFilePath?: string;
30
+ cache: CacheFileContent;
31
+ isCacheResultUsed: boolean;
32
+ cacheOriginalLength: number;
33
+ readOnlyMode: boolean;
34
+ writeOnlyMode: boolean;
35
+ private matchedCacheIndices;
36
+ constructor(cacheId: string, isCacheResultUsed: boolean, cacheFilePath?: string, options?: {
37
+ readOnly?: boolean;
38
+ writeOnly?: boolean;
39
+ });
40
+ matchCache(prompt: TUserPrompt, type: 'plan' | 'locate'): MatchCacheResult<PlanningCache | LocateCache> | undefined;
41
+ matchPlanCache(prompt: string): MatchCacheResult<PlanningCache> | undefined;
42
+ matchLocateCache(prompt: TUserPrompt): MatchCacheResult<LocateCache> | undefined;
43
+ appendCache(cache: PlanningCache | LocateCache): void;
44
+ loadCacheFromFile(): CacheFileContent | undefined;
45
+ flushCacheToFile(options?: {
46
+ cleanUnused?: boolean;
47
+ }): void;
48
+ updateOrAppendCacheRecord(newRecord: PlanningCache | LocateCache, cachedRecord?: MatchCacheResult<PlanningCache | LocateCache>): void;
49
+ }
@@ -0,0 +1,69 @@
1
+ import { type TMultimodalPrompt, type TUserPrompt } from '../common';
2
+ import type { AbstractInterface } from '../device';
3
+ import type Service from '../service';
4
+ import type { TaskRunner } from '../task-runner';
5
+ import { TaskExecutionError } from '../task-runner';
6
+ import type { DeepThinkOption, DeviceAction, ExecutionTaskApply, ExecutionTaskProgressOptions, MidsceneYamlFlowItem, PlanningAction, PlanningActionParamWaitFor, ServiceExtractOption, ServiceExtractParam } from '../types';
7
+ import { type IModelConfig } from '@midscene/shared/env';
8
+ import type { TaskCache } from './task-cache';
9
+ export { locatePlanForLocate } from './task-builder';
10
+ interface ExecutionResult<OutputType = any> {
11
+ output: OutputType;
12
+ thought?: string;
13
+ runner: TaskRunner;
14
+ }
15
+ interface TaskExecutorHooks {
16
+ onTaskUpdate?: (runner: TaskRunner, error?: TaskExecutionError) => Promise<void> | void;
17
+ }
18
+ export { TaskExecutionError };
19
+ export declare class TaskExecutor {
20
+ interface: AbstractInterface;
21
+ service: Service;
22
+ taskCache?: TaskCache;
23
+ private readonly providedActionSpace;
24
+ private readonly taskBuilder;
25
+ onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];
26
+ private readonly hooks?;
27
+ replanningCycleLimit?: number;
28
+ waitAfterAction?: number;
29
+ useDeviceTimestamp?: boolean;
30
+ get page(): AbstractInterface;
31
+ constructor(interfaceInstance: AbstractInterface, service: Service, opts: {
32
+ taskCache?: TaskCache;
33
+ onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];
34
+ replanningCycleLimit?: number;
35
+ waitAfterAction?: number;
36
+ useDeviceTimestamp?: boolean;
37
+ hooks?: TaskExecutorHooks;
38
+ actionSpace: DeviceAction[];
39
+ });
40
+ private createExecutionSession;
41
+ private getActionSpace;
42
+ /**
43
+ * Get a readable time string using device time when configured.
44
+ * This method respects the useDeviceTimestamp configuration.
45
+ * @param format - Optional format string
46
+ * @returns A formatted time string
47
+ */
48
+ private getTimeString;
49
+ convertPlanToExecutable(plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, options?: {
50
+ cacheable?: boolean;
51
+ deepLocate?: boolean;
52
+ abortSignal?: AbortSignal;
53
+ }): Promise<{
54
+ tasks: ExecutionTaskApply[];
55
+ }>;
56
+ loadYamlFlowAsPlanning(userInstruction: string, yamlString: string): Promise<{
57
+ runner: TaskRunner;
58
+ }>;
59
+ runPlans(title: string, plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig): Promise<ExecutionResult>;
60
+ action(userPrompt: string, modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, includeBboxInPlanning: boolean, aiActContext?: string, cacheable?: boolean, replanningCycleLimitOverride?: number, imagesIncludeCount?: number, deepThink?: DeepThinkOption, fileChooserAccept?: string[], deepLocate?: boolean, abortSignal?: AbortSignal): Promise<ExecutionResult<{
61
+ yamlFlow?: MidsceneYamlFlowItem[];
62
+ output?: string;
63
+ } | undefined>>;
64
+ private runAction;
65
+ private createTypeQueryTask;
66
+ createTypeQueryExecution<T>(type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert', demand: ServiceExtractParam, modelConfig: IModelConfig, opt?: ServiceExtractOption, multimodalPrompt?: TMultimodalPrompt): Promise<ExecutionResult<T>>;
67
+ waitFor(assertion: TUserPrompt, opt: PlanningActionParamWaitFor, modelConfig: IModelConfig): Promise<ExecutionResult<void>>;
68
+ }
69
+ export declare function withFileChooser<T>(interfaceInstance: AbstractInterface, fileChooserAccept: string[] | undefined, action: () => Promise<T>): Promise<T>;
@@ -0,0 +1,14 @@
1
+ import type { DetailedLocateParam, ExecutionTask, PullParam, ScrollParam } from '../types';
2
+ export declare function typeStr(task: ExecutionTask): any;
3
+ export declare function locateParamStr(locate?: DetailedLocateParam | string): string;
4
+ export declare function scrollParamStr(scrollParam?: ScrollParam): string;
5
+ export declare function pullParamStr(pullParam?: PullParam): string;
6
+ export declare function extractInsightParam(taskParam: any): {
7
+ content: string;
8
+ images?: Array<{
9
+ name: string;
10
+ url: string;
11
+ }>;
12
+ };
13
+ export declare function taskTitleStr(type: 'Tap' | 'Hover' | 'Input' | 'RightClick' | 'KeyboardPress' | 'Scroll' | 'Act' | 'Query' | 'Assert' | 'WaitFor' | 'Locate' | 'Boolean' | 'Number' | 'String', prompt: string): string;
14
+ export declare function paramStr(task: ExecutionTask): string;