@aiscene/core 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +9 -0
  3. package/dist/es/agent/agent.mjs +749 -0
  4. package/dist/es/agent/agent.mjs.map +1 -0
  5. package/dist/es/agent/common.mjs +0 -0
  6. package/dist/es/agent/execution-session.mjs +41 -0
  7. package/dist/es/agent/execution-session.mjs.map +1 -0
  8. package/dist/es/agent/index.mjs +6 -0
  9. package/dist/es/agent/task-builder.mjs +332 -0
  10. package/dist/es/agent/task-builder.mjs.map +1 -0
  11. package/dist/es/agent/task-cache.mjs +214 -0
  12. package/dist/es/agent/task-cache.mjs.map +1 -0
  13. package/dist/es/agent/tasks.mjs +426 -0
  14. package/dist/es/agent/tasks.mjs.map +1 -0
  15. package/dist/es/agent/ui-utils.mjs +91 -0
  16. package/dist/es/agent/ui-utils.mjs.map +1 -0
  17. package/dist/es/agent/utils.mjs +198 -0
  18. package/dist/es/agent/utils.mjs.map +1 -0
  19. package/dist/es/ai-model/auto-glm/actions.mjs +237 -0
  20. package/dist/es/ai-model/auto-glm/actions.mjs.map +1 -0
  21. package/dist/es/ai-model/auto-glm/index.mjs +6 -0
  22. package/dist/es/ai-model/auto-glm/parser.mjs +239 -0
  23. package/dist/es/ai-model/auto-glm/parser.mjs.map +1 -0
  24. package/dist/es/ai-model/auto-glm/planning.mjs +71 -0
  25. package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -0
  26. package/dist/es/ai-model/auto-glm/prompt.mjs +222 -0
  27. package/dist/es/ai-model/auto-glm/prompt.mjs.map +1 -0
  28. package/dist/es/ai-model/auto-glm/util.mjs +9 -0
  29. package/dist/es/ai-model/auto-glm/util.mjs.map +1 -0
  30. package/dist/es/ai-model/conversation-history.mjs +195 -0
  31. package/dist/es/ai-model/conversation-history.mjs.map +1 -0
  32. package/dist/es/ai-model/index.mjs +11 -0
  33. package/dist/es/ai-model/inspect.mjs +394 -0
  34. package/dist/es/ai-model/inspect.mjs.map +1 -0
  35. package/dist/es/ai-model/llm-planning.mjs +233 -0
  36. package/dist/es/ai-model/llm-planning.mjs.map +1 -0
  37. package/dist/es/ai-model/prompt/common.mjs +7 -0
  38. package/dist/es/ai-model/prompt/common.mjs.map +1 -0
  39. package/dist/es/ai-model/prompt/describe.mjs +66 -0
  40. package/dist/es/ai-model/prompt/describe.mjs.map +1 -0
  41. package/dist/es/ai-model/prompt/extraction.mjs +129 -0
  42. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -0
  43. package/dist/es/ai-model/prompt/llm-locator.mjs +51 -0
  44. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -0
  45. package/dist/es/ai-model/prompt/llm-planning.mjs +568 -0
  46. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -0
  47. package/dist/es/ai-model/prompt/llm-section-locator.mjs +44 -0
  48. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -0
  49. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +35 -0
  50. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -0
  51. package/dist/es/ai-model/prompt/playwright-generator.mjs +117 -0
  52. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -0
  53. package/dist/es/ai-model/prompt/ui-tars-planning.mjs +36 -0
  54. package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -0
  55. package/dist/es/ai-model/prompt/util.mjs +59 -0
  56. package/dist/es/ai-model/prompt/util.mjs.map +1 -0
  57. package/dist/es/ai-model/prompt/yaml-generator.mjs +219 -0
  58. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -0
  59. package/dist/es/ai-model/service-caller/codex-app-server.mjs +575 -0
  60. package/dist/es/ai-model/service-caller/codex-app-server.mjs.map +1 -0
  61. package/dist/es/ai-model/service-caller/image-detail.mjs +6 -0
  62. package/dist/es/ai-model/service-caller/image-detail.mjs.map +1 -0
  63. package/dist/es/ai-model/service-caller/index.mjs +473 -0
  64. package/dist/es/ai-model/service-caller/index.mjs.map +1 -0
  65. package/dist/es/ai-model/ui-tars-planning.mjs +249 -0
  66. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -0
  67. package/dist/es/common.mjs +371 -0
  68. package/dist/es/common.mjs.map +1 -0
  69. package/dist/es/device/device-options.mjs +0 -0
  70. package/dist/es/device/index.mjs +341 -0
  71. package/dist/es/device/index.mjs.map +1 -0
  72. package/dist/es/dump/html-utils.mjs +284 -0
  73. package/dist/es/dump/html-utils.mjs.map +1 -0
  74. package/dist/es/dump/image-restoration.mjs +43 -0
  75. package/dist/es/dump/image-restoration.mjs.map +1 -0
  76. package/dist/es/dump/index.mjs +3 -0
  77. package/dist/es/index.mjs +15 -0
  78. package/dist/es/index.mjs.map +1 -0
  79. package/dist/es/report-generator.mjs +162 -0
  80. package/dist/es/report-generator.mjs.map +1 -0
  81. package/dist/es/report.mjs +137 -0
  82. package/dist/es/report.mjs.map +1 -0
  83. package/dist/es/screenshot-item.mjs +105 -0
  84. package/dist/es/screenshot-item.mjs.map +1 -0
  85. package/dist/es/service/index.mjs +274 -0
  86. package/dist/es/service/index.mjs.map +1 -0
  87. package/dist/es/service/utils.mjs +15 -0
  88. package/dist/es/service/utils.mjs.map +1 -0
  89. package/dist/es/skill/index.mjs +38 -0
  90. package/dist/es/skill/index.mjs.map +1 -0
  91. package/dist/es/task-runner.mjs +263 -0
  92. package/dist/es/task-runner.mjs.map +1 -0
  93. package/dist/es/task-timing.mjs +12 -0
  94. package/dist/es/task-timing.mjs.map +1 -0
  95. package/dist/es/tree.mjs +13 -0
  96. package/dist/es/tree.mjs.map +1 -0
  97. package/dist/es/types.mjs +199 -0
  98. package/dist/es/types.mjs.map +1 -0
  99. package/dist/es/utils.mjs +229 -0
  100. package/dist/es/utils.mjs.map +1 -0
  101. package/dist/es/yaml/builder.mjs +13 -0
  102. package/dist/es/yaml/builder.mjs.map +1 -0
  103. package/dist/es/yaml/index.mjs +4 -0
  104. package/dist/es/yaml/player.mjs +434 -0
  105. package/dist/es/yaml/player.mjs.map +1 -0
  106. package/dist/es/yaml/utils.mjs +102 -0
  107. package/dist/es/yaml/utils.mjs.map +1 -0
  108. package/dist/es/yaml.mjs +0 -0
  109. package/dist/lib/agent/agent.js +797 -0
  110. package/dist/lib/agent/agent.js.map +1 -0
  111. package/dist/lib/agent/common.js +5 -0
  112. package/dist/lib/agent/execution-session.js +75 -0
  113. package/dist/lib/agent/execution-session.js.map +1 -0
  114. package/dist/lib/agent/index.js +81 -0
  115. package/dist/lib/agent/index.js.map +1 -0
  116. package/dist/lib/agent/task-builder.js +369 -0
  117. package/dist/lib/agent/task-builder.js.map +1 -0
  118. package/dist/lib/agent/task-cache.js +266 -0
  119. package/dist/lib/agent/task-cache.js.map +1 -0
  120. package/dist/lib/agent/tasks.js +469 -0
  121. package/dist/lib/agent/tasks.js.map +1 -0
  122. package/dist/lib/agent/ui-utils.js +143 -0
  123. package/dist/lib/agent/ui-utils.js.map +1 -0
  124. package/dist/lib/agent/utils.js +275 -0
  125. package/dist/lib/agent/utils.js.map +1 -0
  126. package/dist/lib/ai-model/auto-glm/actions.js +271 -0
  127. package/dist/lib/ai-model/auto-glm/actions.js.map +1 -0
  128. package/dist/lib/ai-model/auto-glm/index.js +66 -0
  129. package/dist/lib/ai-model/auto-glm/index.js.map +1 -0
  130. package/dist/lib/ai-model/auto-glm/parser.js +282 -0
  131. package/dist/lib/ai-model/auto-glm/parser.js.map +1 -0
  132. package/dist/lib/ai-model/auto-glm/planning.js +105 -0
  133. package/dist/lib/ai-model/auto-glm/planning.js.map +1 -0
  134. package/dist/lib/ai-model/auto-glm/prompt.js +259 -0
  135. package/dist/lib/ai-model/auto-glm/prompt.js.map +1 -0
  136. package/dist/lib/ai-model/auto-glm/util.js +46 -0
  137. package/dist/lib/ai-model/auto-glm/util.js.map +1 -0
  138. package/dist/lib/ai-model/conversation-history.js +229 -0
  139. package/dist/lib/ai-model/conversation-history.js.map +1 -0
  140. package/dist/lib/ai-model/index.js +125 -0
  141. package/dist/lib/ai-model/index.js.map +1 -0
  142. package/dist/lib/ai-model/inspect.js +440 -0
  143. package/dist/lib/ai-model/inspect.js.map +1 -0
  144. package/dist/lib/ai-model/llm-planning.js +270 -0
  145. package/dist/lib/ai-model/llm-planning.js.map +1 -0
  146. package/dist/lib/ai-model/prompt/common.js +41 -0
  147. package/dist/lib/ai-model/prompt/common.js.map +1 -0
  148. package/dist/lib/ai-model/prompt/describe.js +100 -0
  149. package/dist/lib/ai-model/prompt/describe.js.map +1 -0
  150. package/dist/lib/ai-model/prompt/extraction.js +169 -0
  151. package/dist/lib/ai-model/prompt/extraction.js.map +1 -0
  152. package/dist/lib/ai-model/prompt/llm-locator.js +88 -0
  153. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -0
  154. package/dist/lib/ai-model/prompt/llm-planning.js +605 -0
  155. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -0
  156. package/dist/lib/ai-model/prompt/llm-section-locator.js +81 -0
  157. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -0
  158. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +72 -0
  159. package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -0
  160. package/dist/lib/ai-model/prompt/playwright-generator.js +178 -0
  161. package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -0
  162. package/dist/lib/ai-model/prompt/ui-tars-planning.js +73 -0
  163. package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -0
  164. package/dist/lib/ai-model/prompt/util.js +105 -0
  165. package/dist/lib/ai-model/prompt/util.js.map +1 -0
  166. package/dist/lib/ai-model/prompt/yaml-generator.js +280 -0
  167. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -0
  168. package/dist/lib/ai-model/service-caller/codex-app-server.js +624 -0
  169. package/dist/lib/ai-model/service-caller/codex-app-server.js.map +1 -0
  170. package/dist/lib/ai-model/service-caller/image-detail.js +40 -0
  171. package/dist/lib/ai-model/service-caller/image-detail.js.map +1 -0
  172. package/dist/lib/ai-model/service-caller/index.js +538 -0
  173. package/dist/lib/ai-model/service-caller/index.js.map +1 -0
  174. package/dist/lib/ai-model/ui-tars-planning.js +283 -0
  175. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -0
  176. package/dist/lib/common.js +480 -0
  177. package/dist/lib/common.js.map +1 -0
  178. package/dist/lib/device/device-options.js +20 -0
  179. package/dist/lib/device/device-options.js.map +1 -0
  180. package/dist/lib/device/index.js +468 -0
  181. package/dist/lib/device/index.js.map +1 -0
  182. package/dist/lib/dump/html-utils.js +357 -0
  183. package/dist/lib/dump/html-utils.js.map +1 -0
  184. package/dist/lib/dump/image-restoration.js +77 -0
  185. package/dist/lib/dump/image-restoration.js.map +1 -0
  186. package/dist/lib/dump/index.js +60 -0
  187. package/dist/lib/dump/index.js.map +1 -0
  188. package/dist/lib/index.js +146 -0
  189. package/dist/lib/index.js.map +1 -0
  190. package/dist/lib/report-generator.js +200 -0
  191. package/dist/lib/report-generator.js.map +1 -0
  192. package/dist/lib/report.js +171 -0
  193. package/dist/lib/report.js.map +1 -0
  194. package/dist/lib/screenshot-item.js +139 -0
  195. package/dist/lib/screenshot-item.js.map +1 -0
  196. package/dist/lib/service/index.js +308 -0
  197. package/dist/lib/service/index.js.map +1 -0
  198. package/dist/lib/service/utils.js +49 -0
  199. package/dist/lib/service/utils.js.map +1 -0
  200. package/dist/lib/skill/index.js +72 -0
  201. package/dist/lib/skill/index.js.map +1 -0
  202. package/dist/lib/task-runner.js +300 -0
  203. package/dist/lib/task-runner.js.map +1 -0
  204. package/dist/lib/task-timing.js +46 -0
  205. package/dist/lib/task-timing.js.map +1 -0
  206. package/dist/lib/tree.js +53 -0
  207. package/dist/lib/tree.js.map +1 -0
  208. package/dist/lib/types.js +288 -0
  209. package/dist/lib/types.js.map +1 -0
  210. package/dist/lib/utils.js +308 -0
  211. package/dist/lib/utils.js.map +1 -0
  212. package/dist/lib/yaml/builder.js +57 -0
  213. package/dist/lib/yaml/builder.js.map +1 -0
  214. package/dist/lib/yaml/index.js +81 -0
  215. package/dist/lib/yaml/index.js.map +1 -0
  216. package/dist/lib/yaml/player.js +468 -0
  217. package/dist/lib/yaml/player.js.map +1 -0
  218. package/dist/lib/yaml/utils.js +155 -0
  219. package/dist/lib/yaml/utils.js.map +1 -0
  220. package/dist/lib/yaml.js +20 -0
  221. package/dist/lib/yaml.js.map +1 -0
  222. package/dist/types/agent/agent.d.ts +205 -0
  223. package/dist/types/agent/common.d.ts +0 -0
  224. package/dist/types/agent/execution-session.d.ts +36 -0
  225. package/dist/types/agent/index.d.ts +10 -0
  226. package/dist/types/agent/task-builder.d.ts +34 -0
  227. package/dist/types/agent/task-cache.d.ts +49 -0
  228. package/dist/types/agent/tasks.d.ts +69 -0
  229. package/dist/types/agent/ui-utils.d.ts +14 -0
  230. package/dist/types/agent/utils.d.ts +31 -0
  231. package/dist/types/ai-model/auto-glm/actions.d.ts +78 -0
  232. package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
  233. package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
  234. package/dist/types/ai-model/auto-glm/planning.d.ts +12 -0
  235. package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
  236. package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
  237. package/dist/types/ai-model/conversation-history.d.ts +105 -0
  238. package/dist/types/ai-model/index.d.ts +14 -0
  239. package/dist/types/ai-model/inspect.d.ts +67 -0
  240. package/dist/types/ai-model/llm-planning.d.ts +19 -0
  241. package/dist/types/ai-model/prompt/common.d.ts +2 -0
  242. package/dist/types/ai-model/prompt/describe.d.ts +1 -0
  243. package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
  244. package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
  245. package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
  246. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
  247. package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
  248. package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
  249. package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
  250. package/dist/types/ai-model/prompt/util.d.ts +33 -0
  251. package/dist/types/ai-model/prompt/yaml-generator.d.ts +100 -0
  252. package/dist/types/ai-model/service-caller/codex-app-server.d.ts +42 -0
  253. package/dist/types/ai-model/service-caller/image-detail.d.ts +2 -0
  254. package/dist/types/ai-model/service-caller/index.d.ts +49 -0
  255. package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
  256. package/dist/types/common.d.ts +288 -0
  257. package/dist/types/device/device-options.d.ts +142 -0
  258. package/dist/types/device/index.d.ts +2528 -0
  259. package/dist/types/dump/html-utils.d.ts +63 -0
  260. package/dist/types/dump/image-restoration.d.ts +6 -0
  261. package/dist/types/dump/index.d.ts +5 -0
  262. package/dist/types/index.d.ts +17 -0
  263. package/dist/types/report-generator.d.ts +66 -0
  264. package/dist/types/report.d.ts +22 -0
  265. package/dist/types/screenshot-item.d.ts +66 -0
  266. package/dist/types/service/index.d.ts +24 -0
  267. package/dist/types/service/utils.d.ts +2 -0
  268. package/dist/types/skill/index.d.ts +25 -0
  269. package/dist/types/task-runner.d.ts +50 -0
  270. package/dist/types/task-timing.d.ts +8 -0
  271. package/dist/types/tree.d.ts +4 -0
  272. package/dist/types/types.d.ts +669 -0
  273. package/dist/types/utils.d.ts +40 -0
  274. package/dist/types/yaml/builder.d.ts +2 -0
  275. package/dist/types/yaml/index.d.ts +4 -0
  276. package/dist/types/yaml/player.d.ts +34 -0
  277. package/dist/types/yaml/utils.d.ts +9 -0
  278. package/dist/types/yaml.d.ts +217 -0
  279. package/package.json +130 -0
@@ -0,0 +1,155 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.n = (module)=>{
5
+ var getter = module && module.__esModule ? ()=>module['default'] : ()=>module;
6
+ __webpack_require__.d(getter, {
7
+ a: getter
8
+ });
9
+ return getter;
10
+ };
11
+ })();
12
+ (()=>{
13
+ __webpack_require__.d = (exports1, definition)=>{
14
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
15
+ enumerable: true,
16
+ get: definition[key]
17
+ });
18
+ };
19
+ })();
20
+ (()=>{
21
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
22
+ })();
23
+ (()=>{
24
+ __webpack_require__.r = (exports1)=>{
25
+ if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
26
+ value: 'Module'
27
+ });
28
+ Object.defineProperty(exports1, '__esModule', {
29
+ value: true
30
+ });
31
+ };
32
+ })();
33
+ var __webpack_exports__ = {};
34
+ __webpack_require__.r(__webpack_exports__);
35
+ __webpack_require__.d(__webpack_exports__, {
36
+ buildDetailedLocateParam: ()=>buildDetailedLocateParam,
37
+ buildDetailedLocateParamAndRestParams: ()=>buildDetailedLocateParamAndRestParams,
38
+ parseYamlScript: ()=>parseYamlScript,
39
+ interpolateEnvVars: ()=>interpolateEnvVars
40
+ });
41
+ const logger_namespaceObject = require("@midscene/shared/logger");
42
+ const utils_namespaceObject = require("@midscene/shared/utils");
43
+ const external_js_yaml_namespaceObject = require("js-yaml");
44
+ var external_js_yaml_default = /*#__PURE__*/ __webpack_require__.n(external_js_yaml_namespaceObject);
45
+ const debugUtils = (0, logger_namespaceObject.getDebug)('yaml:utils');
46
+ const multimodalLocateOptionFieldMap = {
47
+ images: true,
48
+ convertHttpImage2Base64: true
49
+ };
50
+ const multimodalLocateOptionKeys = Object.keys(multimodalLocateOptionFieldMap);
51
+ function extractMultimodalPrompt(opt) {
52
+ if ('object' != typeof opt || null === opt) return;
53
+ const entries = multimodalLocateOptionKeys.map((key)=>[
54
+ key,
55
+ opt[key]
56
+ ]).filter(([, value])=>void 0 !== value);
57
+ return entries.length ? Object.fromEntries(entries) : void 0;
58
+ }
59
+ function interpolateEnvVars(content) {
60
+ const lines = content.split('\n');
61
+ const processedLines = lines.map((line)=>{
62
+ const trimmedLine = line.trimStart();
63
+ if (trimmedLine.startsWith('#')) return line;
64
+ return line.replace(/\$\{([^}]+)\}/g, (_, envVar)=>{
65
+ const value = process.env[envVar.trim()];
66
+ if (void 0 === value) throw new Error(`Environment variable "${envVar.trim()}" is not defined`);
67
+ return value;
68
+ });
69
+ });
70
+ return processedLines.join('\n');
71
+ }
72
+ function parseYamlScript(content, filePath) {
73
+ let processedContent = content;
74
+ if (-1 !== content.indexOf('android') && content.match(/deviceId:\s*(\d+)/)) {
75
+ let matchedDeviceId;
76
+ processedContent = content.replace(/deviceId:\s*(\d+)/g, (match, deviceId)=>{
77
+ matchedDeviceId = deviceId;
78
+ return `deviceId: '${deviceId}'`;
79
+ });
80
+ console.warn(`please use string-style deviceId in yaml script, for example: deviceId: "${matchedDeviceId}"`);
81
+ }
82
+ const interpolatedContent = interpolateEnvVars(processedContent);
83
+ const obj = external_js_yaml_default().load(interpolatedContent, {
84
+ schema: external_js_yaml_default().JSON_SCHEMA
85
+ });
86
+ const pathTip = filePath ? `, failed to load ${filePath}` : '';
87
+ (0, utils_namespaceObject.assert)(obj.tasks, `property "tasks" is required in yaml script ${pathTip}`);
88
+ (0, utils_namespaceObject.assert)(Array.isArray(obj.tasks), `property "tasks" must be an array in yaml script, but got ${obj.tasks}`);
89
+ return obj;
90
+ }
91
+ function buildDetailedLocateParam(locatePrompt, opt) {
92
+ debugUtils('will call buildDetailedLocateParam', locatePrompt, opt);
93
+ let normalizedLocatePrompt = locatePrompt;
94
+ if ('object' == typeof locatePrompt && null !== locatePrompt && 'prompt' in locatePrompt) {
95
+ const { prompt: innerPrompt, ...rest } = locatePrompt;
96
+ const hasMultimodalFields = Object.keys(rest).length > 0;
97
+ normalizedLocatePrompt = hasMultimodalFields ? locatePrompt : innerPrompt;
98
+ }
99
+ let prompt = normalizedLocatePrompt || opt?.prompt || opt?.locate;
100
+ let deepLocate = false;
101
+ let cacheable = true;
102
+ let xpath;
103
+ if ('object' == typeof opt && null !== opt) {
104
+ deepLocate = opt.deepLocate ?? opt.deepThink ?? false;
105
+ cacheable = opt.cacheable ?? true;
106
+ xpath = opt.xpath;
107
+ if (locatePrompt && opt.prompt && locatePrompt !== opt.prompt) console.warn('conflict prompt for item', locatePrompt, opt, 'maybe you put the prompt in the wrong place');
108
+ prompt = prompt || opt.prompt;
109
+ }
110
+ if (!prompt) return void debugUtils('no prompt, will return undefined in buildDetailedLocateParam', opt);
111
+ const multimodalPrompt = extractMultimodalPrompt(opt);
112
+ if (multimodalPrompt) prompt = 'string' == typeof prompt ? {
113
+ prompt,
114
+ ...multimodalPrompt
115
+ } : {
116
+ ...prompt,
117
+ ...multimodalPrompt
118
+ };
119
+ return {
120
+ prompt,
121
+ deepLocate,
122
+ cacheable,
123
+ xpath
124
+ };
125
+ }
126
+ function buildDetailedLocateParamAndRestParams(locatePrompt, opt, excludeKeys = []) {
127
+ const multimodalPrompt = extractMultimodalPrompt(opt);
128
+ const locateParam = buildDetailedLocateParam(locatePrompt, opt);
129
+ const restParams = {};
130
+ if ('object' == typeof opt && null !== opt) {
131
+ const allKeys = Object.keys(opt);
132
+ const locateParamKeys = Object.keys(locateParam || {});
133
+ const multimodalPromptKeys = 'object' == typeof locateParam?.prompt && locateParam?.prompt !== null ? Object.keys(multimodalPrompt || {}) : [];
134
+ for (const key of allKeys)if (!locateParamKeys.includes(key) && !multimodalPromptKeys.includes(key) && !excludeKeys.includes(key) && 'locate' !== key) restParams[key] = opt[key];
135
+ }
136
+ return {
137
+ locateParam,
138
+ restParams
139
+ };
140
+ }
141
+ exports.buildDetailedLocateParam = __webpack_exports__.buildDetailedLocateParam;
142
+ exports.buildDetailedLocateParamAndRestParams = __webpack_exports__.buildDetailedLocateParamAndRestParams;
143
+ exports.interpolateEnvVars = __webpack_exports__.interpolateEnvVars;
144
+ exports.parseYamlScript = __webpack_exports__.parseYamlScript;
145
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
146
+ "buildDetailedLocateParam",
147
+ "buildDetailedLocateParamAndRestParams",
148
+ "interpolateEnvVars",
149
+ "parseYamlScript"
150
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
151
+ Object.defineProperty(exports, '__esModule', {
152
+ value: true
153
+ });
154
+
155
+ //# sourceMappingURL=utils.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"yaml/utils.js","sources":["webpack/runtime/compat_get_default_export","webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/yaml/utils.ts"],"sourcesContent":["// getDefaultExport function for compatibility with non-ESM modules\n__webpack_require__.n = (module) => {\n\tvar getter = module && module.__esModule ?\n\t\t() => (module['default']) :\n\t\t() => (module);\n\t__webpack_require__.d(getter, { a: getter });\n\treturn getter;\n};\n","__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { TMultimodalPrompt, TUserPrompt } from '@/common';\nimport type {\n DetailedLocateParam,\n LocateOption,\n MidsceneYamlScript,\n} from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport yaml from 'js-yaml';\n\nconst debugUtils = getDebug('yaml:utils');\n\nconst multimodalLocateOptionFieldMap: Record<keyof TMultimodalPrompt, true> = {\n images: true,\n convertHttpImage2Base64: true,\n};\n\nconst multimodalLocateOptionKeys = Object.keys(\n multimodalLocateOptionFieldMap,\n) as Array<keyof TMultimodalPrompt>;\n\nfunction extractMultimodalPrompt(\n opt?: LocateOption,\n): Partial<TMultimodalPrompt> | undefined {\n if (typeof opt !== 'object' || opt === null) {\n return undefined;\n }\n\n const entries = multimodalLocateOptionKeys\n .map((key) => [key, opt[key]] as const)\n .filter(([, value]) => value !== undefined);\n\n return entries.length\n ? (Object.fromEntries(entries) as Partial<TMultimodalPrompt>)\n : undefined;\n}\n\nexport function interpolateEnvVars(content: string): string {\n // Process line by line to skip commented lines\n const lines = content.split('\\n');\n const processedLines = lines.map((line) => {\n // Check if the line is a YAML comment (starts with # after optional whitespace)\n const trimmedLine = line.trimStart();\n if (trimmedLine.startsWith('#')) {\n // Skip interpolation for comment lines\n return line;\n }\n\n // Process environment variables for non-comment lines\n return line.replace(/\\$\\{([^}]+)\\}/g, (_, envVar) => {\n const value = process.env[envVar.trim()];\n if (value === undefined) {\n throw new Error(\n `Environment variable \"${envVar.trim()}\" is not defined`,\n );\n }\n return value;\n });\n });\n\n return processedLines.join('\\n');\n}\n\nexport function parseYamlScript(\n content: string,\n filePath?: string,\n): MidsceneYamlScript {\n let processedContent = content;\n if (content.indexOf('android') !== -1 && content.match(/deviceId:\\s*(\\d+)/)) {\n let matchedDeviceId;\n processedContent = content.replace(\n /deviceId:\\s*(\\d+)/g,\n (match, deviceId) => {\n matchedDeviceId = deviceId;\n return `deviceId: '${deviceId}'`;\n },\n );\n console.warn(\n `please use string-style deviceId in yaml script, for example: deviceId: \"${matchedDeviceId}\"`,\n );\n }\n const interpolatedContent = interpolateEnvVars(processedContent);\n const obj = yaml.load(interpolatedContent, {\n schema: yaml.JSON_SCHEMA,\n }) as MidsceneYamlScript;\n\n const pathTip = filePath ? `, failed to load ${filePath}` : '';\n assert(obj.tasks, `property \"tasks\" is required in yaml script ${pathTip}`);\n assert(\n Array.isArray(obj.tasks),\n `property \"tasks\" must be an array in yaml script, but got ${obj.tasks}`,\n );\n return obj;\n}\n\nexport function buildDetailedLocateParam(\n locatePrompt: TUserPrompt,\n opt?: LocateOption,\n): DetailedLocateParam | undefined {\n debugUtils('will call buildDetailedLocateParam', locatePrompt, opt);\n // Normalize object-form TUserPrompt: when the object only contains a\n // `prompt` string (no multimodal fields like `images`), unwrap it to\n // avoid double nesting like { prompt: { prompt: '...' } }.\n let normalizedLocatePrompt: TUserPrompt = locatePrompt;\n if (\n typeof locatePrompt === 'object' &&\n locatePrompt !== null &&\n 'prompt' in locatePrompt\n ) {\n const { prompt: innerPrompt, ...rest } = locatePrompt;\n const hasMultimodalFields = Object.keys(rest).length > 0;\n normalizedLocatePrompt = hasMultimodalFields ? locatePrompt : innerPrompt;\n }\n\n let prompt = normalizedLocatePrompt || opt?.prompt || (opt as any)?.locate; // as a shortcut\n let deepLocate = false;\n let cacheable = true;\n let xpath = undefined;\n\n if (typeof opt === 'object' && opt !== null) {\n // Backward-compatible: accept `deepThink` as a deprecated alias for `deepLocate`.\n // All downstream code works on `deepLocate` only; the compatibility resolution\n // is intentionally kept here at the entry point so it does not bleed through\n // the rest of the call stack.\n deepLocate = opt.deepLocate ?? opt.deepThink ?? false;\n cacheable = opt.cacheable ?? true;\n xpath = opt.xpath;\n if (locatePrompt && opt.prompt && locatePrompt !== opt.prompt) {\n console.warn(\n 'conflict prompt for item',\n locatePrompt,\n opt,\n 'maybe you put the prompt in the wrong place',\n );\n }\n prompt = prompt || opt.prompt;\n }\n\n if (!prompt) {\n debugUtils(\n 'no prompt, will return undefined in buildDetailedLocateParam',\n opt,\n );\n return undefined;\n }\n\n const multimodalPrompt = extractMultimodalPrompt(opt);\n if (multimodalPrompt) {\n prompt =\n typeof prompt === 'string'\n ? {\n prompt,\n ...multimodalPrompt,\n }\n : {\n ...prompt,\n ...multimodalPrompt,\n };\n }\n\n return {\n prompt,\n deepLocate,\n cacheable,\n xpath,\n };\n}\n\nexport function buildDetailedLocateParamAndRestParams(\n locatePrompt: TUserPrompt,\n opt: LocateOption | undefined,\n excludeKeys: string[] = [],\n): {\n locateParam: DetailedLocateParam | undefined;\n restParams: Record<string, any>;\n} {\n const multimodalPrompt = extractMultimodalPrompt(opt);\n const locateParam = buildDetailedLocateParam(locatePrompt, opt);\n\n // Extract all keys from opt except the ones already included in locateParam\n const restParams: Record<string, any> = {};\n\n if (typeof opt === 'object' && opt !== null) {\n // Get all keys from opt\n const allKeys = Object.keys(opt);\n\n // Keys already included in locateParam: prompt, deepLocate, cacheable, xpath\n const locateParamKeys = Object.keys(locateParam || {});\n const multimodalPromptKeys =\n typeof locateParam?.prompt === 'object' && locateParam?.prompt !== null\n ? Object.keys(multimodalPrompt || {})\n : [];\n\n // Extract all other keys\n for (const key of allKeys) {\n if (\n !locateParamKeys.includes(key) &&\n !multimodalPromptKeys.includes(key) &&\n !excludeKeys.includes(key) &&\n key !== 'locate'\n ) {\n restParams[key] = opt[key as keyof LocateOption];\n }\n }\n }\n\n return {\n locateParam,\n restParams,\n };\n}\n"],"names":["__webpack_require__","module","getter","definition","key","Object","obj","prop","Symbol","debugUtils","getDebug","multimodalLocateOptionFieldMap","multimodalLocateOptionKeys","extractMultimodalPrompt","opt","entries","value","undefined","interpolateEnvVars","content","lines","processedLines","line","trimmedLine","_","envVar","process","Error","parseYamlScript","filePath","processedContent","matchedDeviceId","match","deviceId","console","interpolatedContent","yaml","pathTip","assert","Array","buildDetailedLocateParam","locatePrompt","normalizedLocatePrompt","innerPrompt","rest","hasMultimodalFields","prompt","deepLocate","cacheable","xpath","multimodalPrompt","buildDetailedLocateParamAndRestParams","excludeKeys","locateParam","restParams","allKeys","locateParamKeys","multimodalPromptKeys"],"mappings":";;;IACAA,oBAAoB,CAAC,GAAG,CAACC;QACxB,IAAIC,SAASD,UAAUA,OAAO,UAAU,GACvC,IAAOA,MAAM,CAAC,UAAU,GACxB,IAAOA;QACRD,oBAAoB,CAAC,CAACE,QAAQ;YAAE,GAAGA;QAAO;QAC1C,OAAOA;IACR;;;ICPAF,oBAAoB,CAAC,GAAG,CAAC,UAASG;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGH,oBAAoB,CAAC,CAACG,YAAYC,QAAQ,CAACJ,oBAAoB,CAAC,CAAC,UAASI,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAJ,oBAAoB,CAAC,GAAG,CAACM,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFP,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOQ,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;ACIA,MAAMI,aAAaC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAE5B,MAAMC,iCAAwE;IAC5E,QAAQ;IACR,yBAAyB;AAC3B;AAEA,MAAMC,6BAA6BP,OAAO,IAAI,CAC5CM;AAGF,SAASE,wBACPC,GAAkB;IAElB,IAAI,AAAe,YAAf,OAAOA,OAAoBA,AAAQ,SAARA,KAC7B;IAGF,MAAMC,UAAUH,2BACb,GAAG,CAAC,CAACR,MAAQ;YAACA;YAAKU,GAAG,CAACV,IAAI;SAAC,EAC5B,MAAM,CAAC,CAAC,GAAGY,MAAM,GAAKA,AAAUC,WAAVD;IAEzB,OAAOD,QAAQ,MAAM,GAChBV,OAAO,WAAW,CAACU,WACpBE;AACN;AAEO,SAASC,mBAAmBC,OAAe;IAEhD,MAAMC,QAAQD,QAAQ,KAAK,CAAC;IAC5B,MAAME,iBAAiBD,MAAM,GAAG,CAAC,CAACE;QAEhC,MAAMC,cAAcD,KAAK,SAAS;QAClC,IAAIC,YAAY,UAAU,CAAC,MAEzB,OAAOD;QAIT,OAAOA,KAAK,OAAO,CAAC,kBAAkB,CAACE,GAAGC;YACxC,MAAMT,QAAQU,QAAQ,GAAG,CAACD,OAAO,IAAI,GAAG;YACxC,IAAIT,AAAUC,WAAVD,OACF,MAAM,IAAIW,MACR,CAAC,sBAAsB,EAAEF,OAAO,IAAI,GAAG,gBAAgB,CAAC;YAG5D,OAAOT;QACT;IACF;IAEA,OAAOK,eAAe,IAAI,CAAC;AAC7B;AAEO,SAASO,gBACdT,OAAe,EACfU,QAAiB;IAEjB,IAAIC,mBAAmBX;IACvB,IAAIA,AAA+B,OAA/BA,QAAQ,OAAO,CAAC,cAAqBA,QAAQ,KAAK,CAAC,sBAAsB;QAC3E,IAAIY;QACJD,mBAAmBX,QAAQ,OAAO,CAChC,sBACA,CAACa,OAAOC;YACNF,kBAAkBE;YAClB,OAAO,CAAC,WAAW,EAAEA,SAAS,CAAC,CAAC;QAClC;QAEFC,QAAQ,IAAI,CACV,CAAC,yEAAyE,EAAEH,gBAAgB,CAAC,CAAC;IAElG;IACA,MAAMI,sBAAsBjB,mBAAmBY;IAC/C,MAAMxB,MAAM8B,2BAAAA,IAAS,CAACD,qBAAqB;QACzC,QAAQC,AAAAA,2BAAAA,WAAgB;IAC1B;IAEA,MAAMC,UAAUR,WAAW,CAAC,iBAAiB,EAAEA,UAAU,GAAG;IAC5DS,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOhC,IAAI,KAAK,EAAE,CAAC,4CAA4C,EAAE+B,SAAS;IAC1EC,IAAAA,sBAAAA,MAAAA,AAAAA,EACEC,MAAM,OAAO,CAACjC,IAAI,KAAK,GACvB,CAAC,0DAA0D,EAAEA,IAAI,KAAK,EAAE;IAE1E,OAAOA;AACT;AAEO,SAASkC,yBACdC,YAAyB,EACzB3B,GAAkB;IAElBL,WAAW,sCAAsCgC,cAAc3B;IAI/D,IAAI4B,yBAAsCD;IAC1C,IACE,AAAwB,YAAxB,OAAOA,gBACPA,AAAiB,SAAjBA,gBACA,YAAYA,cACZ;QACA,MAAM,EAAE,QAAQE,WAAW,EAAE,GAAGC,MAAM,GAAGH;QACzC,MAAMI,sBAAsBxC,OAAO,IAAI,CAACuC,MAAM,MAAM,GAAG;QACvDF,yBAAyBG,sBAAsBJ,eAAeE;IAChE;IAEA,IAAIG,SAASJ,0BAA0B5B,KAAK,UAAWA,KAAa;IACpE,IAAIiC,aAAa;IACjB,IAAIC,YAAY;IAChB,IAAIC;IAEJ,IAAI,AAAe,YAAf,OAAOnC,OAAoBA,AAAQ,SAARA,KAAc;QAK3CiC,aAAajC,IAAI,UAAU,IAAIA,IAAI,SAAS,IAAI;QAChDkC,YAAYlC,IAAI,SAAS,IAAI;QAC7BmC,QAAQnC,IAAI,KAAK;QACjB,IAAI2B,gBAAgB3B,IAAI,MAAM,IAAI2B,iBAAiB3B,IAAI,MAAM,EAC3DoB,QAAQ,IAAI,CACV,4BACAO,cACA3B,KACA;QAGJgC,SAASA,UAAUhC,IAAI,MAAM;IAC/B;IAEA,IAAI,CAACgC,QAAQ,YACXrC,WACE,gEACAK;IAKJ,MAAMoC,mBAAmBrC,wBAAwBC;IACjD,IAAIoC,kBACFJ,SACE,AAAkB,YAAlB,OAAOA,SACH;QACEA;QACA,GAAGI,gBAAgB;IACrB,IACA;QACE,GAAGJ,MAAM;QACT,GAAGI,gBAAgB;IACrB;IAGR,OAAO;QACLJ;QACAC;QACAC;QACAC;IACF;AACF;AAEO,SAASE,sCACdV,YAAyB,EACzB3B,GAA6B,EAC7BsC,cAAwB,EAAE;IAK1B,MAAMF,mBAAmBrC,wBAAwBC;IACjD,MAAMuC,cAAcb,yBAAyBC,cAAc3B;IAG3D,MAAMwC,aAAkC,CAAC;IAEzC,IAAI,AAAe,YAAf,OAAOxC,OAAoBA,AAAQ,SAARA,KAAc;QAE3C,MAAMyC,UAAUlD,OAAO,IAAI,CAACS;QAG5B,MAAM0C,kBAAkBnD,OAAO,IAAI,CAACgD,eAAe,CAAC;QACpD,MAAMI,uBACJ,AAA+B,YAA/B,OAAOJ,aAAa,UAAuBA,aAAa,WAAW,OAC/DhD,OAAO,IAAI,CAAC6C,oBAAoB,CAAC,KACjC,EAAE;QAGR,KAAK,MAAM9C,OAAOmD,QAChB,IACE,CAACC,gBAAgB,QAAQ,CAACpD,QAC1B,CAACqD,qBAAqB,QAAQ,CAACrD,QAC/B,CAACgD,YAAY,QAAQ,CAAChD,QACtBA,AAAQ,aAARA,KAEAkD,UAAU,CAAClD,IAAI,GAAGU,GAAG,CAACV,IAA0B;IAGtD;IAEA,OAAO;QACLiD;QACAC;IACF;AACF"}
@@ -0,0 +1,20 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.r = (exports1)=>{
5
+ if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
6
+ value: 'Module'
7
+ });
8
+ Object.defineProperty(exports1, '__esModule', {
9
+ value: true
10
+ });
11
+ };
12
+ })();
13
+ var __webpack_exports__ = {};
14
+ __webpack_require__.r(__webpack_exports__);
15
+ for(var __rspack_i in __webpack_exports__)exports[__rspack_i] = __webpack_exports__[__rspack_i];
16
+ Object.defineProperty(exports, '__esModule', {
17
+ value: true
18
+ });
19
+
20
+ //# sourceMappingURL=yaml.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"yaml.js","sources":["webpack/runtime/make_namespace_object"],"sourcesContent":["// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};"],"names":["__webpack_require__","Symbol","Object"],"mappings":";;;IACAA,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOC,UAA0BA,OAAO,WAAW,EACrDC,OAAO,cAAc,CAAC,UAASD,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEC,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D"}
@@ -0,0 +1,205 @@
1
+ import type { TUserPrompt } from '../ai-model/index';
2
+ import Service from '../service/index';
3
+ import { type ActionParam, type ActionReturn, type AgentAssertOpt, type AgentDescribeElementAtPointResult, type AgentOpt, type AgentWaitForOpt, type DeepThinkOption, type DeviceAction, ExecutionDump, GroupedActionDump, type LocateOption, type LocateResultElement, type LocateValidatorResult, type LocatorValidatorOption, type OnTaskStartTip, type ScrollParam, type ServiceAction, type ServiceExtractOption, type ServiceExtractParam, type UIContext } from '../types';
4
+ export type TestStatus = 'passed' | 'failed' | 'timedOut' | 'skipped' | 'interrupted';
5
+ import type { AbstractInterface } from '../device';
6
+ import type { TaskRunner } from '../task-runner';
7
+ import { ModelConfigManager } from '@midscene/shared/env';
8
+ import { TaskCache } from './task-cache';
9
+ import { TaskExecutor } from './tasks';
10
+ export type AiActOptions = {
11
+ cacheable?: boolean;
12
+ fileChooserAccept?: string | string[];
13
+ deepThink?: DeepThinkOption;
14
+ deepLocate?: boolean;
15
+ abortSignal?: AbortSignal;
16
+ };
17
+ export declare class Agent<InterfaceType extends AbstractInterface = AbstractInterface> {
18
+ interface: InterfaceType;
19
+ service: Service;
20
+ dump: GroupedActionDump;
21
+ reportFile?: string | null;
22
+ reportFileName?: string;
23
+ taskExecutor: TaskExecutor;
24
+ opts: AgentOpt;
25
+ /**
26
+ * If true, the agent will not perform any actions
27
+ */
28
+ dryMode: boolean;
29
+ onTaskStartTip?: OnTaskStartTip;
30
+ taskCache?: TaskCache;
31
+ private dumpUpdateListeners;
32
+ get onDumpUpdate(): ((dump: string, executionDump?: ExecutionDump) => void) | undefined;
33
+ set onDumpUpdate(callback: ((dump: string, executionDump?: ExecutionDump) => void) | undefined);
34
+ destroyed: boolean;
35
+ modelConfigManager: ModelConfigManager;
36
+ /**
37
+ * Frozen page context for consistent AI operations
38
+ */
39
+ private frozenUIContext?;
40
+ private get aiActContext();
41
+ /**
42
+ * Flag to track if VL model warning has been shown
43
+ */
44
+ private hasWarnedNonVLModel;
45
+ private executionDumpIndexByRunner;
46
+ private fullActionSpace;
47
+ private reportGenerator;
48
+ get page(): InterfaceType;
49
+ /**
50
+ * Ensures VL model warning is shown once when needed
51
+ */
52
+ private ensureVLModelWarning;
53
+ private resolveReplanningCycleLimit;
54
+ constructor(interfaceInstance: InterfaceType, opts?: AgentOpt);
55
+ getActionSpace(): Promise<DeviceAction[]>;
56
+ private static readonly CONTEXT_RETRY_MAX;
57
+ private static readonly CONTEXT_RETRY_DELAY_MS;
58
+ /**
59
+ * Override in subclasses to indicate which errors are transient and should
60
+ * trigger an automatic retry when building the UI context.
61
+ * Returns `false` by default (no retry).
62
+ */
63
+ protected isRetryableContextError(_error: unknown): boolean;
64
+ getUIContext(action?: ServiceAction): Promise<UIContext>;
65
+ _snapshotContext(): Promise<UIContext>;
66
+ /**
67
+ * @deprecated Use {@link setAIActContext} instead.
68
+ */
69
+ setAIActionContext(prompt: string): Promise<void>;
70
+ setAIActContext(prompt: string): Promise<void>;
71
+ resetDump(): GroupedActionDump;
72
+ appendExecutionDump(execution: ExecutionDump, runner?: TaskRunner): void;
73
+ dumpDataString(opt?: {
74
+ inlineScreenshots?: boolean;
75
+ }): string;
76
+ reportHTMLString(opt?: {
77
+ inlineScreenshots?: boolean;
78
+ }): string;
79
+ private lastExecutionDump?;
80
+ writeOutActionDumps(executionDump?: ExecutionDump): void;
81
+ private getGroupMeta;
82
+ private callbackOnTaskStartTip;
83
+ wrapActionInActionSpace<T extends DeviceAction>(name: string): (param: ActionParam<T>) => Promise<ActionReturn<T>>;
84
+ callActionInActionSpace<T = any>(type: string, opt?: T): Promise<any>;
85
+ aiTap(locatePrompt: TUserPrompt, opt?: LocateOption & {
86
+ fileChooserAccept?: string | string[];
87
+ }): Promise<any>;
88
+ aiRightClick(locatePrompt: TUserPrompt, opt?: LocateOption): Promise<any>;
89
+ aiDoubleClick(locatePrompt: TUserPrompt, opt?: LocateOption): Promise<any>;
90
+ aiHover(locatePrompt: TUserPrompt, opt?: LocateOption): Promise<any>;
91
+ aiInput(locatePrompt: TUserPrompt, opt: LocateOption & {
92
+ value: string | number;
93
+ } & {
94
+ autoDismissKeyboard?: boolean;
95
+ } & {
96
+ mode?: 'replace' | 'clear' | 'typeOnly' | 'append';
97
+ }): Promise<any>;
98
+ /**
99
+ * @deprecated Use aiInput(locatePrompt, opt) instead where opt contains the value
100
+ */
101
+ aiInput(value: string | number, locatePrompt: TUserPrompt, opt?: LocateOption & {
102
+ autoDismissKeyboard?: boolean;
103
+ } & {
104
+ mode?: 'replace' | 'clear' | 'typeOnly' | 'append';
105
+ }): Promise<any>;
106
+ aiKeyboardPress(locatePrompt: TUserPrompt, opt: LocateOption & {
107
+ keyName: string;
108
+ }): Promise<any>;
109
+ /**
110
+ * @deprecated Use aiKeyboardPress(locatePrompt, opt) instead where opt contains the keyName
111
+ */
112
+ aiKeyboardPress(keyName: string, locatePrompt?: TUserPrompt, opt?: LocateOption): Promise<any>;
113
+ aiScroll(locatePrompt: TUserPrompt | undefined, opt: LocateOption & ScrollParam): Promise<any>;
114
+ /**
115
+ * @deprecated Use aiScroll(locatePrompt, opt) instead where opt contains the scroll parameters
116
+ */
117
+ aiScroll(scrollParam: ScrollParam, locatePrompt?: TUserPrompt, opt?: LocateOption): Promise<any>;
118
+ aiPinch(locatePrompt: TUserPrompt | undefined, opt: LocateOption & {
119
+ direction: 'in' | 'out';
120
+ distance?: number;
121
+ duration?: number;
122
+ }): Promise<any>;
123
+ aiAct(taskPrompt: string, opt?: AiActOptions): Promise<string | undefined>;
124
+ /**
125
+ * @deprecated Use {@link Agent.aiAct} instead.
126
+ */
127
+ aiAction(taskPrompt: string, opt?: AiActOptions): Promise<string | undefined>;
128
+ aiQuery<ReturnType = any>(demand: ServiceExtractParam, opt?: ServiceExtractOption): Promise<ReturnType>;
129
+ aiBoolean(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<boolean>;
130
+ aiNumber(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<number>;
131
+ aiString(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<string>;
132
+ aiAsk(prompt: TUserPrompt, opt?: ServiceExtractOption): Promise<string>;
133
+ describeElementAtPoint(center: [number, number], opt?: {
134
+ verifyPrompt?: boolean;
135
+ retryLimit?: number;
136
+ deepLocate?: boolean;
137
+ } & LocatorValidatorOption): Promise<AgentDescribeElementAtPointResult>;
138
+ verifyLocator(prompt: string, locateOpt: LocateOption | undefined, expectCenter: [number, number], verifyLocateOption?: LocatorValidatorOption): Promise<LocateValidatorResult>;
139
+ aiLocate(prompt: TUserPrompt, opt?: LocateOption): Promise<Pick<LocateResultElement, "rect" | "center">>;
140
+ aiAssert(assertion: TUserPrompt, msg?: string, opt?: AgentAssertOpt & ServiceExtractOption): Promise<{
141
+ pass: boolean;
142
+ thought: string | undefined;
143
+ message: string | undefined;
144
+ } | undefined>;
145
+ aiWaitFor(assertion: TUserPrompt, opt?: AgentWaitForOpt): Promise<void>;
146
+ ai(...args: Parameters<typeof this.aiAct>): Promise<string | undefined>;
147
+ runYaml(yamlScriptContent: string): Promise<{
148
+ result: Record<string, any>;
149
+ }>;
150
+ evaluateJavaScript(script: string): Promise<any>;
151
+ /**
152
+ * Add a dump update listener
153
+ * @param listener Listener function
154
+ * @returns A remove function that can be called to remove this listener
155
+ */
156
+ addDumpUpdateListener(listener: (dump: string, executionDump?: ExecutionDump) => void): () => void;
157
+ /**
158
+ * Remove a dump update listener
159
+ * @param listener The listener function to remove
160
+ */
161
+ removeDumpUpdateListener(listener: (dump: string, executionDump?: ExecutionDump) => void): void;
162
+ /**
163
+ * Clear all dump update listeners
164
+ */
165
+ clearDumpUpdateListeners(): void;
166
+ destroy(): Promise<void>;
167
+ recordToReport(title?: string, opt?: {
168
+ content: string;
169
+ }): Promise<void>;
170
+ /**
171
+ * @deprecated Use {@link Agent.recordToReport} instead.
172
+ */
173
+ logScreenshot(title?: string, opt?: {
174
+ content: string;
175
+ }): Promise<void>;
176
+ _unstableLogContent(): {
177
+ groupName: string;
178
+ groupDescription: string | undefined;
179
+ executions: ExecutionDump[];
180
+ };
181
+ /**
182
+ * Freezes the current page context to be reused in subsequent AI operations
183
+ * This avoids recalculating page context for each operation
184
+ */
185
+ freezePageContext(): Promise<void>;
186
+ /**
187
+ * Unfreezes the page context, allowing AI operations to calculate context dynamically
188
+ */
189
+ unfreezePageContext(): Promise<void>;
190
+ /**
191
+ * Process cache configuration and return normalized cache settings
192
+ */
193
+ private processCacheConfig;
194
+ private normalizeFilePaths;
195
+ private normalizeFileInput;
196
+ /**
197
+ * Manually flush cache to file
198
+ * @param options - Optional configuration
199
+ * @param options.cleanUnused - If true, removes unused cache records before flushing
200
+ */
201
+ flushCache(options?: {
202
+ cleanUnused?: boolean;
203
+ }): Promise<void>;
204
+ }
205
+ export declare const createAgent: (interfaceInstance: AbstractInterface, opts?: AgentOpt) => Agent<AbstractInterface>;
File without changes
@@ -0,0 +1,36 @@
1
+ import { type TaskExecutionError, TaskRunner } from '../task-runner';
2
+ import type { ExecutionTaskApply, ExecutionTaskProgressOptions, UIContext } from '../types';
3
+ type ExecutionSessionOptions = ExecutionTaskProgressOptions & {
4
+ tasks?: ExecutionTaskApply[];
5
+ onTaskUpdate?: (runner: TaskRunner, error?: TaskExecutionError) => Promise<void> | void;
6
+ };
7
+ /**
8
+ * Thin wrapper around {@link TaskRunner} that represents a single linear execution run.
9
+ */
10
+ export declare class ExecutionSession {
11
+ private readonly runner;
12
+ constructor(name: string, contextProvider: () => Promise<UIContext>, options?: ExecutionSessionOptions);
13
+ append(tasks: ExecutionTaskApply[] | ExecutionTaskApply, options?: {
14
+ allowWhenError?: boolean;
15
+ }): Promise<void>;
16
+ appendAndRun(tasks: ExecutionTaskApply[] | ExecutionTaskApply, options?: {
17
+ allowWhenError?: boolean;
18
+ }): Promise<{
19
+ output: any;
20
+ thought?: string;
21
+ } | undefined>;
22
+ run(options?: {
23
+ allowWhenError?: boolean;
24
+ }): Promise<{
25
+ output: any;
26
+ thought?: string;
27
+ } | undefined>;
28
+ isInErrorState(): boolean;
29
+ latestErrorTask(): import("../types").ExecutionTask | null;
30
+ appendErrorPlan(errorMsg: string): Promise<{
31
+ output: undefined;
32
+ runner: TaskRunner;
33
+ }>;
34
+ getRunner(): TaskRunner;
35
+ }
36
+ export {};
@@ -0,0 +1,10 @@
1
+ export { Agent, createAgent } from './agent';
2
+ export { commonContextParser } from './utils';
3
+ export { getReportFileName, printReportMsg, } from './utils';
4
+ export { extractInsightParam, locateParamStr, paramStr, taskTitleStr, typeStr, } from './ui-utils';
5
+ export { type LocateCache, type PlanningCache, TaskCache } from './task-cache';
6
+ export { cacheFileExt } from './task-cache';
7
+ export { TaskExecutor } from './tasks';
8
+ export { getCurrentExecutionFile } from './utils';
9
+ export type { AgentOpt } from '../types';
10
+ export type { AiActOptions } from './agent';
@@ -0,0 +1,34 @@
1
+ import type { AbstractInterface } from '../device';
2
+ import type Service from '../service';
3
+ import type { DetailedLocateParam, DeviceAction, ExecutionTaskApply, PlanningAction, PlanningLocateParam } from '../types';
4
+ import type { IModelConfig } from '@midscene/shared/env';
5
+ import type { TaskCache } from './task-cache';
6
+ export declare function locatePlanForLocate(param: string | DetailedLocateParam): PlanningAction<PlanningLocateParam>;
7
+ interface TaskBuilderDeps {
8
+ interfaceInstance: AbstractInterface;
9
+ service: Service;
10
+ taskCache?: TaskCache;
11
+ actionSpace: DeviceAction[];
12
+ waitAfterAction?: number;
13
+ }
14
+ interface BuildOptions {
15
+ cacheable?: boolean;
16
+ deepLocate?: boolean;
17
+ abortSignal?: AbortSignal;
18
+ }
19
+ export declare class TaskBuilder {
20
+ private readonly interface;
21
+ private readonly service;
22
+ private readonly taskCache?;
23
+ private readonly actionSpace;
24
+ private readonly waitAfterAction?;
25
+ constructor({ interfaceInstance, service, taskCache, actionSpace, waitAfterAction, }: TaskBuilderDeps);
26
+ build(plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, options?: BuildOptions): Promise<{
27
+ tasks: ExecutionTaskApply[];
28
+ }>;
29
+ private handleFinishedPlan;
30
+ private handleLocatePlan;
31
+ private handleActionPlan;
32
+ private createLocateTask;
33
+ }
34
+ export {};
@@ -0,0 +1,49 @@
1
+ import type { TUserPrompt } from '../ai-model';
2
+ import type { ElementCacheFeature } from '../types';
3
+ export declare const debug: import("@midscene/shared/logger").DebugFunction;
4
+ export interface PlanningCache {
5
+ type: 'plan';
6
+ prompt: string;
7
+ yamlWorkflow: string;
8
+ }
9
+ export interface LocateCache {
10
+ type: 'locate';
11
+ prompt: TUserPrompt;
12
+ cache?: ElementCacheFeature;
13
+ /** @deprecated kept for backward compatibility */
14
+ xpaths?: string[];
15
+ }
16
+ export interface MatchCacheResult<T extends PlanningCache | LocateCache> {
17
+ cacheContent: T;
18
+ cacheUsable: boolean;
19
+ updateFn: (cb: (cache: T) => void) => void;
20
+ }
21
+ export type CacheFileContent = {
22
+ midsceneVersion: string;
23
+ cacheId: string;
24
+ caches: Array<PlanningCache | LocateCache>;
25
+ };
26
+ export declare const cacheFileExt = ".cache.yaml";
27
+ export declare class TaskCache {
28
+ cacheId: string;
29
+ cacheFilePath?: string;
30
+ cache: CacheFileContent;
31
+ isCacheResultUsed: boolean;
32
+ cacheOriginalLength: number;
33
+ readOnlyMode: boolean;
34
+ writeOnlyMode: boolean;
35
+ private matchedCacheIndices;
36
+ constructor(cacheId: string, isCacheResultUsed: boolean, cacheFilePath?: string, options?: {
37
+ readOnly?: boolean;
38
+ writeOnly?: boolean;
39
+ });
40
+ matchCache(prompt: TUserPrompt, type: 'plan' | 'locate'): MatchCacheResult<PlanningCache | LocateCache> | undefined;
41
+ matchPlanCache(prompt: string): MatchCacheResult<PlanningCache> | undefined;
42
+ matchLocateCache(prompt: TUserPrompt): MatchCacheResult<LocateCache> | undefined;
43
+ appendCache(cache: PlanningCache | LocateCache): void;
44
+ loadCacheFromFile(): CacheFileContent | undefined;
45
+ flushCacheToFile(options?: {
46
+ cleanUnused?: boolean;
47
+ }): void;
48
+ updateOrAppendCacheRecord(newRecord: PlanningCache | LocateCache, cachedRecord?: MatchCacheResult<PlanningCache | LocateCache>): void;
49
+ }
@@ -0,0 +1,69 @@
1
+ import { type TMultimodalPrompt, type TUserPrompt } from '../common';
2
+ import type { AbstractInterface } from '../device';
3
+ import type Service from '../service';
4
+ import type { TaskRunner } from '../task-runner';
5
+ import { TaskExecutionError } from '../task-runner';
6
+ import type { DeepThinkOption, DeviceAction, ExecutionTaskApply, ExecutionTaskProgressOptions, MidsceneYamlFlowItem, PlanningAction, PlanningActionParamWaitFor, ServiceExtractOption, ServiceExtractParam } from '../types';
7
+ import { type IModelConfig } from '@midscene/shared/env';
8
+ import type { TaskCache } from './task-cache';
9
+ export { locatePlanForLocate } from './task-builder';
10
+ interface ExecutionResult<OutputType = any> {
11
+ output: OutputType;
12
+ thought?: string;
13
+ runner: TaskRunner;
14
+ }
15
+ interface TaskExecutorHooks {
16
+ onTaskUpdate?: (runner: TaskRunner, error?: TaskExecutionError) => Promise<void> | void;
17
+ }
18
+ export { TaskExecutionError };
19
+ export declare class TaskExecutor {
20
+ interface: AbstractInterface;
21
+ service: Service;
22
+ taskCache?: TaskCache;
23
+ private readonly providedActionSpace;
24
+ private readonly taskBuilder;
25
+ onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];
26
+ private readonly hooks?;
27
+ replanningCycleLimit?: number;
28
+ waitAfterAction?: number;
29
+ useDeviceTimestamp?: boolean;
30
+ get page(): AbstractInterface;
31
+ constructor(interfaceInstance: AbstractInterface, service: Service, opts: {
32
+ taskCache?: TaskCache;
33
+ onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];
34
+ replanningCycleLimit?: number;
35
+ waitAfterAction?: number;
36
+ useDeviceTimestamp?: boolean;
37
+ hooks?: TaskExecutorHooks;
38
+ actionSpace: DeviceAction[];
39
+ });
40
+ private createExecutionSession;
41
+ private getActionSpace;
42
+ /**
43
+ * Get a readable time string using device time when configured.
44
+ * This method respects the useDeviceTimestamp configuration.
45
+ * @param format - Optional format string
46
+ * @returns A formatted time string
47
+ */
48
+ private getTimeString;
49
+ convertPlanToExecutable(plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, options?: {
50
+ cacheable?: boolean;
51
+ deepLocate?: boolean;
52
+ abortSignal?: AbortSignal;
53
+ }): Promise<{
54
+ tasks: ExecutionTaskApply[];
55
+ }>;
56
+ loadYamlFlowAsPlanning(userInstruction: string, yamlString: string): Promise<{
57
+ runner: TaskRunner;
58
+ }>;
59
+ runPlans(title: string, plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig): Promise<ExecutionResult>;
60
+ action(userPrompt: string, modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, includeBboxInPlanning: boolean, aiActContext?: string, cacheable?: boolean, replanningCycleLimitOverride?: number, imagesIncludeCount?: number, deepThink?: DeepThinkOption, fileChooserAccept?: string[], deepLocate?: boolean, abortSignal?: AbortSignal): Promise<ExecutionResult<{
61
+ yamlFlow?: MidsceneYamlFlowItem[];
62
+ output?: string;
63
+ } | undefined>>;
64
+ private runAction;
65
+ private createTypeQueryTask;
66
+ createTypeQueryExecution<T>(type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert', demand: ServiceExtractParam, modelConfig: IModelConfig, opt?: ServiceExtractOption, multimodalPrompt?: TMultimodalPrompt): Promise<ExecutionResult<T>>;
67
+ waitFor(assertion: TUserPrompt, opt: PlanningActionParamWaitFor, modelConfig: IModelConfig): Promise<ExecutionResult<void>>;
68
+ }
69
+ export declare function withFileChooser<T>(interfaceInstance: AbstractInterface, fileChooserAccept: string[] | undefined, action: () => Promise<T>): Promise<T>;
@@ -0,0 +1,14 @@
1
+ import type { DetailedLocateParam, ExecutionTask, PullParam, ScrollParam } from '../types';
2
+ export declare function typeStr(task: ExecutionTask): any;
3
+ export declare function locateParamStr(locate?: DetailedLocateParam | string): string;
4
+ export declare function scrollParamStr(scrollParam?: ScrollParam): string;
5
+ export declare function pullParamStr(pullParam?: PullParam): string;
6
+ export declare function extractInsightParam(taskParam: any): {
7
+ content: string;
8
+ images?: Array<{
9
+ name: string;
10
+ url: string;
11
+ }>;
12
+ };
13
+ export declare function taskTitleStr(type: 'Tap' | 'Hover' | 'Input' | 'RightClick' | 'KeyboardPress' | 'Scroll' | 'Act' | 'Query' | 'Assert' | 'WaitFor' | 'Locate' | 'Boolean' | 'Number' | 'String', prompt: string): string;
14
+ export declare function paramStr(task: ExecutionTask): string;