@aiscene/core 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +9 -0
  3. package/dist/es/agent/agent.mjs +753 -0
  4. package/dist/es/agent/agent.mjs.map +1 -0
  5. package/dist/es/agent/common.mjs +0 -0
  6. package/dist/es/agent/execution-session.mjs +41 -0
  7. package/dist/es/agent/execution-session.mjs.map +1 -0
  8. package/dist/es/agent/index.mjs +6 -0
  9. package/dist/es/agent/task-builder.mjs +332 -0
  10. package/dist/es/agent/task-builder.mjs.map +1 -0
  11. package/dist/es/agent/task-cache.mjs +214 -0
  12. package/dist/es/agent/task-cache.mjs.map +1 -0
  13. package/dist/es/agent/tasks.mjs +423 -0
  14. package/dist/es/agent/tasks.mjs.map +1 -0
  15. package/dist/es/agent/ui-utils.mjs +91 -0
  16. package/dist/es/agent/ui-utils.mjs.map +1 -0
  17. package/dist/es/agent/utils.mjs +169 -0
  18. package/dist/es/agent/utils.mjs.map +1 -0
  19. package/dist/es/ai-model/auto-glm/actions.mjs +239 -0
  20. package/dist/es/ai-model/auto-glm/actions.mjs.map +1 -0
  21. package/dist/es/ai-model/auto-glm/index.mjs +6 -0
  22. package/dist/es/ai-model/auto-glm/parser.mjs +239 -0
  23. package/dist/es/ai-model/auto-glm/parser.mjs.map +1 -0
  24. package/dist/es/ai-model/auto-glm/planning.mjs +71 -0
  25. package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -0
  26. package/dist/es/ai-model/auto-glm/prompt.mjs +222 -0
  27. package/dist/es/ai-model/auto-glm/prompt.mjs.map +1 -0
  28. package/dist/es/ai-model/auto-glm/util.mjs +9 -0
  29. package/dist/es/ai-model/auto-glm/util.mjs.map +1 -0
  30. package/dist/es/ai-model/connectivity.mjs +138 -0
  31. package/dist/es/ai-model/connectivity.mjs.map +1 -0
  32. package/dist/es/ai-model/conversation-history.mjs +195 -0
  33. package/dist/es/ai-model/conversation-history.mjs.map +1 -0
  34. package/dist/es/ai-model/index.mjs +12 -0
  35. package/dist/es/ai-model/inspect.mjs +397 -0
  36. package/dist/es/ai-model/inspect.mjs.map +1 -0
  37. package/dist/es/ai-model/llm-planning.mjs +233 -0
  38. package/dist/es/ai-model/llm-planning.mjs.map +1 -0
  39. package/dist/es/ai-model/prompt/common.mjs +7 -0
  40. package/dist/es/ai-model/prompt/common.mjs.map +1 -0
  41. package/dist/es/ai-model/prompt/describe.mjs +66 -0
  42. package/dist/es/ai-model/prompt/describe.mjs.map +1 -0
  43. package/dist/es/ai-model/prompt/extraction.mjs +131 -0
  44. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -0
  45. package/dist/es/ai-model/prompt/llm-locator.mjs +51 -0
  46. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -0
  47. package/dist/es/ai-model/prompt/llm-planning.mjs +568 -0
  48. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -0
  49. package/dist/es/ai-model/prompt/llm-section-locator.mjs +44 -0
  50. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -0
  51. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +35 -0
  52. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -0
  53. package/dist/es/ai-model/prompt/playwright-generator.mjs +117 -0
  54. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -0
  55. package/dist/es/ai-model/prompt/ui-tars-planning.mjs +36 -0
  56. package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -0
  57. package/dist/es/ai-model/prompt/util.mjs +59 -0
  58. package/dist/es/ai-model/prompt/util.mjs.map +1 -0
  59. package/dist/es/ai-model/prompt/yaml-generator.mjs +203 -0
  60. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -0
  61. package/dist/es/ai-model/service-caller/codex-app-server.mjs +575 -0
  62. package/dist/es/ai-model/service-caller/codex-app-server.mjs.map +1 -0
  63. package/dist/es/ai-model/service-caller/image-detail.mjs +6 -0
  64. package/dist/es/ai-model/service-caller/image-detail.mjs.map +1 -0
  65. package/dist/es/ai-model/service-caller/index.mjs +475 -0
  66. package/dist/es/ai-model/service-caller/index.mjs.map +1 -0
  67. package/dist/es/ai-model/ui-tars-planning.mjs +249 -0
  68. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -0
  69. package/dist/es/common.mjs +371 -0
  70. package/dist/es/common.mjs.map +1 -0
  71. package/dist/es/device/device-options.mjs +0 -0
  72. package/dist/es/device/index.mjs +341 -0
  73. package/dist/es/device/index.mjs.map +1 -0
  74. package/dist/es/dump/html-utils.mjs +292 -0
  75. package/dist/es/dump/html-utils.mjs.map +1 -0
  76. package/dist/es/dump/index.mjs +3 -0
  77. package/dist/es/dump/screenshot-restoration.mjs +32 -0
  78. package/dist/es/dump/screenshot-restoration.mjs.map +1 -0
  79. package/dist/es/dump/screenshot-store.mjs +126 -0
  80. package/dist/es/dump/screenshot-store.mjs.map +1 -0
  81. package/dist/es/index.mjs +19 -0
  82. package/dist/es/index.mjs.map +1 -0
  83. package/dist/es/report-cli.mjs +151 -0
  84. package/dist/es/report-cli.mjs.map +1 -0
  85. package/dist/es/report-generator.mjs +205 -0
  86. package/dist/es/report-generator.mjs.map +1 -0
  87. package/dist/es/report-markdown.mjs +218 -0
  88. package/dist/es/report-markdown.mjs.map +1 -0
  89. package/dist/es/report.mjs +270 -0
  90. package/dist/es/report.mjs.map +1 -0
  91. package/dist/es/screenshot-item.mjs +122 -0
  92. package/dist/es/screenshot-item.mjs.map +1 -0
  93. package/dist/es/service/index.mjs +274 -0
  94. package/dist/es/service/index.mjs.map +1 -0
  95. package/dist/es/service/utils.mjs +15 -0
  96. package/dist/es/service/utils.mjs.map +1 -0
  97. package/dist/es/skill/index.mjs +38 -0
  98. package/dist/es/skill/index.mjs.map +1 -0
  99. package/dist/es/task-runner.mjs +263 -0
  100. package/dist/es/task-runner.mjs.map +1 -0
  101. package/dist/es/task-timing.mjs +12 -0
  102. package/dist/es/task-timing.mjs.map +1 -0
  103. package/dist/es/tree.mjs +13 -0
  104. package/dist/es/tree.mjs.map +1 -0
  105. package/dist/es/types.mjs +204 -0
  106. package/dist/es/types.mjs.map +1 -0
  107. package/dist/es/utils.mjs +234 -0
  108. package/dist/es/utils.mjs.map +1 -0
  109. package/dist/es/yaml/builder.mjs +13 -0
  110. package/dist/es/yaml/builder.mjs.map +1 -0
  111. package/dist/es/yaml/index.mjs +4 -0
  112. package/dist/es/yaml/player.mjs +442 -0
  113. package/dist/es/yaml/player.mjs.map +1 -0
  114. package/dist/es/yaml/utils.mjs +102 -0
  115. package/dist/es/yaml/utils.mjs.map +1 -0
  116. package/dist/es/yaml.mjs +0 -0
  117. package/dist/lib/agent/agent.js +801 -0
  118. package/dist/lib/agent/agent.js.map +1 -0
  119. package/dist/lib/agent/common.js +5 -0
  120. package/dist/lib/agent/execution-session.js +75 -0
  121. package/dist/lib/agent/execution-session.js.map +1 -0
  122. package/dist/lib/agent/index.js +78 -0
  123. package/dist/lib/agent/index.js.map +1 -0
  124. package/dist/lib/agent/task-builder.js +369 -0
  125. package/dist/lib/agent/task-builder.js.map +1 -0
  126. package/dist/lib/agent/task-cache.js +266 -0
  127. package/dist/lib/agent/task-cache.js.map +1 -0
  128. package/dist/lib/agent/tasks.js +466 -0
  129. package/dist/lib/agent/tasks.js.map +1 -0
  130. package/dist/lib/agent/ui-utils.js +143 -0
  131. package/dist/lib/agent/ui-utils.js.map +1 -0
  132. package/dist/lib/agent/utils.js +240 -0
  133. package/dist/lib/agent/utils.js.map +1 -0
  134. package/dist/lib/ai-model/auto-glm/actions.js +273 -0
  135. package/dist/lib/ai-model/auto-glm/actions.js.map +1 -0
  136. package/dist/lib/ai-model/auto-glm/index.js +66 -0
  137. package/dist/lib/ai-model/auto-glm/index.js.map +1 -0
  138. package/dist/lib/ai-model/auto-glm/parser.js +282 -0
  139. package/dist/lib/ai-model/auto-glm/parser.js.map +1 -0
  140. package/dist/lib/ai-model/auto-glm/planning.js +105 -0
  141. package/dist/lib/ai-model/auto-glm/planning.js.map +1 -0
  142. package/dist/lib/ai-model/auto-glm/prompt.js +259 -0
  143. package/dist/lib/ai-model/auto-glm/prompt.js.map +1 -0
  144. package/dist/lib/ai-model/auto-glm/util.js +46 -0
  145. package/dist/lib/ai-model/auto-glm/util.js.map +1 -0
  146. package/dist/lib/ai-model/connectivity.js +182 -0
  147. package/dist/lib/ai-model/connectivity.js.map +1 -0
  148. package/dist/lib/ai-model/conversation-history.js +229 -0
  149. package/dist/lib/ai-model/conversation-history.js.map +1 -0
  150. package/dist/lib/ai-model/index.js +129 -0
  151. package/dist/lib/ai-model/index.js.map +1 -0
  152. package/dist/lib/ai-model/inspect.js +443 -0
  153. package/dist/lib/ai-model/inspect.js.map +1 -0
  154. package/dist/lib/ai-model/llm-planning.js +270 -0
  155. package/dist/lib/ai-model/llm-planning.js.map +1 -0
  156. package/dist/lib/ai-model/prompt/common.js +41 -0
  157. package/dist/lib/ai-model/prompt/common.js.map +1 -0
  158. package/dist/lib/ai-model/prompt/describe.js +100 -0
  159. package/dist/lib/ai-model/prompt/describe.js.map +1 -0
  160. package/dist/lib/ai-model/prompt/extraction.js +171 -0
  161. package/dist/lib/ai-model/prompt/extraction.js.map +1 -0
  162. package/dist/lib/ai-model/prompt/llm-locator.js +88 -0
  163. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -0
  164. package/dist/lib/ai-model/prompt/llm-planning.js +605 -0
  165. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -0
  166. package/dist/lib/ai-model/prompt/llm-section-locator.js +81 -0
  167. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -0
  168. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +72 -0
  169. package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -0
  170. package/dist/lib/ai-model/prompt/playwright-generator.js +178 -0
  171. package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -0
  172. package/dist/lib/ai-model/prompt/ui-tars-planning.js +73 -0
  173. package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -0
  174. package/dist/lib/ai-model/prompt/util.js +105 -0
  175. package/dist/lib/ai-model/prompt/util.js.map +1 -0
  176. package/dist/lib/ai-model/prompt/yaml-generator.js +264 -0
  177. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -0
  178. package/dist/lib/ai-model/service-caller/codex-app-server.js +624 -0
  179. package/dist/lib/ai-model/service-caller/codex-app-server.js.map +1 -0
  180. package/dist/lib/ai-model/service-caller/image-detail.js +40 -0
  181. package/dist/lib/ai-model/service-caller/image-detail.js.map +1 -0
  182. package/dist/lib/ai-model/service-caller/index.js +540 -0
  183. package/dist/lib/ai-model/service-caller/index.js.map +1 -0
  184. package/dist/lib/ai-model/ui-tars-planning.js +283 -0
  185. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -0
  186. package/dist/lib/common.js +480 -0
  187. package/dist/lib/common.js.map +1 -0
  188. package/dist/lib/device/device-options.js +20 -0
  189. package/dist/lib/device/device-options.js.map +1 -0
  190. package/dist/lib/device/index.js +468 -0
  191. package/dist/lib/device/index.js.map +1 -0
  192. package/dist/lib/dump/html-utils.js +368 -0
  193. package/dist/lib/dump/html-utils.js.map +1 -0
  194. package/dist/lib/dump/index.js +60 -0
  195. package/dist/lib/dump/index.js.map +1 -0
  196. package/dist/lib/dump/screenshot-restoration.js +66 -0
  197. package/dist/lib/dump/screenshot-restoration.js.map +1 -0
  198. package/dist/lib/dump/screenshot-store.js +166 -0
  199. package/dist/lib/dump/screenshot-store.js.map +1 -0
  200. package/dist/lib/index.js +186 -0
  201. package/dist/lib/index.js.map +1 -0
  202. package/dist/lib/report-cli.js +191 -0
  203. package/dist/lib/report-cli.js.map +1 -0
  204. package/dist/lib/report-generator.js +246 -0
  205. package/dist/lib/report-generator.js.map +1 -0
  206. package/dist/lib/report-markdown.js +255 -0
  207. package/dist/lib/report-markdown.js.map +1 -0
  208. package/dist/lib/report.js +316 -0
  209. package/dist/lib/report.js.map +1 -0
  210. package/dist/lib/screenshot-item.js +156 -0
  211. package/dist/lib/screenshot-item.js.map +1 -0
  212. package/dist/lib/service/index.js +308 -0
  213. package/dist/lib/service/index.js.map +1 -0
  214. package/dist/lib/service/utils.js +49 -0
  215. package/dist/lib/service/utils.js.map +1 -0
  216. package/dist/lib/skill/index.js +72 -0
  217. package/dist/lib/skill/index.js.map +1 -0
  218. package/dist/lib/task-runner.js +300 -0
  219. package/dist/lib/task-runner.js.map +1 -0
  220. package/dist/lib/task-timing.js +46 -0
  221. package/dist/lib/task-timing.js.map +1 -0
  222. package/dist/lib/tree.js +53 -0
  223. package/dist/lib/tree.js.map +1 -0
  224. package/dist/lib/types.js +300 -0
  225. package/dist/lib/types.js.map +1 -0
  226. package/dist/lib/utils.js +316 -0
  227. package/dist/lib/utils.js.map +1 -0
  228. package/dist/lib/yaml/builder.js +57 -0
  229. package/dist/lib/yaml/builder.js.map +1 -0
  230. package/dist/lib/yaml/index.js +81 -0
  231. package/dist/lib/yaml/index.js.map +1 -0
  232. package/dist/lib/yaml/player.js +476 -0
  233. package/dist/lib/yaml/player.js.map +1 -0
  234. package/dist/lib/yaml/utils.js +155 -0
  235. package/dist/lib/yaml/utils.js.map +1 -0
  236. package/dist/lib/yaml.js +20 -0
  237. package/dist/lib/yaml.js.map +1 -0
  238. package/dist/types/agent/agent.d.ts +216 -0
  239. package/dist/types/agent/common.d.ts +0 -0
  240. package/dist/types/agent/execution-session.d.ts +36 -0
  241. package/dist/types/agent/index.d.ts +9 -0
  242. package/dist/types/agent/task-builder.d.ts +34 -0
  243. package/dist/types/agent/task-cache.d.ts +49 -0
  244. package/dist/types/agent/tasks.d.ts +69 -0
  245. package/dist/types/agent/ui-utils.d.ts +14 -0
  246. package/dist/types/agent/utils.d.ts +25 -0
  247. package/dist/types/ai-model/auto-glm/actions.d.ts +78 -0
  248. package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
  249. package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
  250. package/dist/types/ai-model/auto-glm/planning.d.ts +12 -0
  251. package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
  252. package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
  253. package/dist/types/ai-model/connectivity.d.ts +20 -0
  254. package/dist/types/ai-model/conversation-history.d.ts +105 -0
  255. package/dist/types/ai-model/index.d.ts +16 -0
  256. package/dist/types/ai-model/inspect.d.ts +67 -0
  257. package/dist/types/ai-model/llm-planning.d.ts +19 -0
  258. package/dist/types/ai-model/prompt/common.d.ts +2 -0
  259. package/dist/types/ai-model/prompt/describe.d.ts +1 -0
  260. package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
  261. package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
  262. package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
  263. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
  264. package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
  265. package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
  266. package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
  267. package/dist/types/ai-model/prompt/util.d.ts +33 -0
  268. package/dist/types/ai-model/prompt/yaml-generator.d.ts +102 -0
  269. package/dist/types/ai-model/service-caller/codex-app-server.d.ts +42 -0
  270. package/dist/types/ai-model/service-caller/image-detail.d.ts +2 -0
  271. package/dist/types/ai-model/service-caller/index.d.ts +49 -0
  272. package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
  273. package/dist/types/common.d.ts +288 -0
  274. package/dist/types/device/device-options.d.ts +145 -0
  275. package/dist/types/device/index.d.ts +2528 -0
  276. package/dist/types/dump/html-utils.d.ts +75 -0
  277. package/dist/types/dump/index.d.ts +5 -0
  278. package/dist/types/dump/screenshot-restoration.d.ts +8 -0
  279. package/dist/types/dump/screenshot-store.d.ts +49 -0
  280. package/dist/types/index.d.ts +21 -0
  281. package/dist/types/report-cli.d.ts +36 -0
  282. package/dist/types/report-generator.d.ts +81 -0
  283. package/dist/types/report-markdown.d.ts +24 -0
  284. package/dist/types/report.d.ts +52 -0
  285. package/dist/types/screenshot-item.d.ts +67 -0
  286. package/dist/types/service/index.d.ts +24 -0
  287. package/dist/types/service/utils.d.ts +2 -0
  288. package/dist/types/skill/index.d.ts +25 -0
  289. package/dist/types/task-runner.d.ts +50 -0
  290. package/dist/types/task-timing.d.ts +8 -0
  291. package/dist/types/tree.d.ts +4 -0
  292. package/dist/types/types.d.ts +681 -0
  293. package/dist/types/utils.d.ts +45 -0
  294. package/dist/types/yaml/builder.d.ts +2 -0
  295. package/dist/types/yaml/index.d.ts +4 -0
  296. package/dist/types/yaml/player.d.ts +34 -0
  297. package/dist/types/yaml/utils.d.ts +9 -0
  298. package/dist/types/yaml.d.ts +215 -0
  299. package/package.json +111 -0
@@ -0,0 +1,681 @@
1
+ import type { NodeType } from '@midscene/shared/constants';
2
+ import type { CreateOpenAIClientFn, TModelConfig } from '@midscene/shared/env';
3
+ import type { BaseElement, LocateResultElement, Rect, Size } from '@midscene/shared/types';
4
+ import type { z } from 'zod';
5
+ import type { TUserPrompt } from './common';
6
+ import { ScreenshotItem } from './screenshot-item';
7
+ import type { DetailedLocateParam, MidsceneYamlFlowItem, ServiceExtractOption } from './yaml';
8
+ export type { ElementTreeNode, BaseElement, Rect, Size, Point, } from '@midscene/shared/types';
9
+ export * from './yaml';
10
+ export type AIUsageInfo = Record<string, any> & {
11
+ prompt_tokens: number | undefined;
12
+ completion_tokens: number | undefined;
13
+ total_tokens: number | undefined;
14
+ cached_input: number | undefined;
15
+ time_cost: number | undefined;
16
+ model_name: string | undefined;
17
+ model_description: string | undefined;
18
+ intent: string | undefined;
19
+ request_id: string | undefined;
20
+ };
21
+ export type { LocateResultElement };
22
+ export type AISingleElementResponseByPosition = {
23
+ position?: {
24
+ x: number;
25
+ y: number;
26
+ };
27
+ bbox?: [number, number, number, number];
28
+ reason: string;
29
+ text: string;
30
+ };
31
+ export interface AIElementCoordinatesResponse {
32
+ bbox: [number, number, number, number];
33
+ errors?: string[];
34
+ }
35
+ export type AIElementResponse = AIElementCoordinatesResponse;
36
+ export interface AIDataExtractionResponse<DataDemand> {
37
+ data: DataDemand;
38
+ errors?: string[];
39
+ thought?: string;
40
+ }
41
+ export interface AISectionLocatorResponse {
42
+ bbox: [number, number, number, number];
43
+ references_bbox?: [number, number, number, number][];
44
+ error?: string;
45
+ }
46
+ export interface AIAssertionResponse {
47
+ pass: boolean;
48
+ thought: string;
49
+ }
50
+ export interface AIDescribeElementResponse {
51
+ description: string;
52
+ error?: string;
53
+ }
54
+ export interface LocatorValidatorOption {
55
+ centerDistanceThreshold?: number;
56
+ }
57
+ export interface LocateValidatorResult {
58
+ pass: boolean;
59
+ rect: Rect;
60
+ center: [number, number];
61
+ centerDistance?: number;
62
+ }
63
+ export interface AgentDescribeElementAtPointResult {
64
+ prompt: string;
65
+ deepLocate: boolean;
66
+ verifyResult?: LocateValidatorResult;
67
+ }
68
+ /**
69
+ * context
70
+ */
71
+ export declare abstract class UIContext {
72
+ /**
73
+ * screenshot of the current UI state. which size is shotSize(be shrunk by screenshotShrinkFactor),
74
+ */
75
+ abstract screenshot: ScreenshotItem;
76
+ /**
77
+ * screenshot size after shrinking
78
+ */
79
+ abstract shotSize: Size;
80
+ /**
81
+ * The ratio for converting shrunk screenshot coordinates to logical coordinates.
82
+ *
83
+ * Example:
84
+ * - Physical screen width: 3000px, dpr=6
85
+ * - Logical width: 500px
86
+ * - User-defined screenshotShrinkFactor: 2
87
+ * - Actual shrunk screenshot width: 3000 / 2 = 1500px
88
+ * - shrunkShotToLogicalRatio: dpr / screenshotShrinkFactor = 6 / 2 = 3
89
+ * - To map back to logical coordinates: 1500 / shrunkShotToLogicalRatio = 500px
90
+ */
91
+ abstract shrunkShotToLogicalRatio: number;
92
+ abstract _isFrozen?: boolean;
93
+ abstract deprecatedDpr?: number;
94
+ }
95
+ export type EnsureObject<T> = {
96
+ [K in keyof T]: any;
97
+ };
98
+ export type ServiceAction = 'locate' | 'extract' | 'assert' | 'describe';
99
+ export type ServiceExtractParam = string | Record<string, string>;
100
+ export type ElementCacheFeature = Record<string, unknown>;
101
+ export interface LocateResult {
102
+ element: LocateResultElement | null;
103
+ rect?: Rect;
104
+ }
105
+ export type ThinkingLevel = 'off' | 'medium' | 'high';
106
+ export type DeepThinkOption = 'unset' | true | false;
107
+ export interface ServiceTaskInfo {
108
+ durationMs: number;
109
+ formatResponse?: string;
110
+ rawResponse?: string;
111
+ usage?: AIUsageInfo;
112
+ searchArea?: Rect;
113
+ searchAreaRawResponse?: string;
114
+ searchAreaUsage?: AIUsageInfo;
115
+ reasoning_content?: string;
116
+ }
117
+ export interface DumpMeta {
118
+ logTime: number;
119
+ }
120
+ export type ReportAttributes = Record<string, string | number | boolean | null | undefined>;
121
+ export interface ReportDumpWithAttributes {
122
+ dumpString: string;
123
+ attributes?: ReportAttributes;
124
+ }
125
+ export interface ServiceDump extends DumpMeta {
126
+ type: 'locate' | 'extract' | 'assert';
127
+ logId: string;
128
+ userQuery: {
129
+ element?: TUserPrompt;
130
+ dataDemand?: ServiceExtractParam;
131
+ assertion?: TUserPrompt;
132
+ };
133
+ matchedElement: LocateResultElement[];
134
+ matchedRect?: Rect;
135
+ deepLocate?: boolean;
136
+ data: any;
137
+ assertionPass?: boolean;
138
+ assertionThought?: string;
139
+ taskInfo: ServiceTaskInfo;
140
+ error?: string;
141
+ output?: any;
142
+ }
143
+ export type PartialServiceDumpFromSDK = Omit<ServiceDump, 'logTime' | 'logId' | 'model_name'>;
144
+ export interface ServiceResultBase {
145
+ dump: ServiceDump;
146
+ }
147
+ export type LocateResultWithDump = LocateResult & ServiceResultBase;
148
+ export interface ServiceExtractResult<T> extends ServiceResultBase {
149
+ data: T;
150
+ thought?: string;
151
+ usage?: AIUsageInfo;
152
+ reasoning_content?: string;
153
+ }
154
+ export declare class ServiceError extends Error {
155
+ dump: ServiceDump;
156
+ constructor(message: string, dump: ServiceDump);
157
+ }
158
+ export interface LiteUISection {
159
+ name: string;
160
+ description: string;
161
+ sectionCharacteristics: string;
162
+ textIds: string[];
163
+ }
164
+ export type ElementById = (id: string) => BaseElement | null;
165
+ export type ServiceAssertionResponse = AIAssertionResponse & {
166
+ usage?: AIUsageInfo;
167
+ };
168
+ /**
169
+ * agent
170
+ */
171
+ export type OnTaskStartTip = (tip: string) => Promise<void> | void;
172
+ export interface AgentWaitForOpt extends ServiceExtractOption {
173
+ checkIntervalMs?: number;
174
+ timeoutMs?: number;
175
+ }
176
+ export interface AgentAssertOpt {
177
+ keepRawResponse?: boolean;
178
+ }
179
+ /**
180
+ * planning
181
+ *
182
+ */
183
+ export interface PlanningLocateParam extends DetailedLocateParam {
184
+ bbox?: [number, number, number, number];
185
+ }
186
+ export interface PlanningAction<ParamType = any> {
187
+ thought?: string;
188
+ log?: string;
189
+ type: string;
190
+ param: ParamType;
191
+ }
192
+ export type SubGoalStatus = 'pending' | 'running' | 'finished';
193
+ export interface SubGoal {
194
+ index: number;
195
+ status: SubGoalStatus;
196
+ description: string;
197
+ logs?: string[];
198
+ }
199
+ export interface RawResponsePlanningAIResponse {
200
+ action: PlanningAction;
201
+ thought?: string;
202
+ log: string;
203
+ memory?: string;
204
+ error?: string;
205
+ finalizeMessage?: string;
206
+ finalizeSuccess?: boolean;
207
+ updateSubGoals?: SubGoal[];
208
+ markFinishedIndexes?: number[];
209
+ }
210
+ export interface PlanningAIResponse extends Omit<RawResponsePlanningAIResponse, 'action'> {
211
+ actions?: PlanningAction[];
212
+ usage?: AIUsageInfo;
213
+ rawResponse?: string;
214
+ yamlFlow?: MidsceneYamlFlowItem[];
215
+ yamlString?: string;
216
+ error?: string;
217
+ reasoning_content?: string;
218
+ shouldContinuePlanning: boolean;
219
+ output?: string;
220
+ }
221
+ export interface PlanningActionParamSleep {
222
+ timeMs: number;
223
+ }
224
+ export interface PlanningActionParamError {
225
+ thought: string;
226
+ }
227
+ export type PlanningActionParamWaitFor = AgentWaitForOpt & {};
228
+ export interface LongPressParam {
229
+ duration?: number;
230
+ }
231
+ export interface PullParam {
232
+ direction: 'up' | 'down';
233
+ distance?: number;
234
+ duration?: number;
235
+ }
236
+ /**
237
+ * misc
238
+ */
239
+ export interface Color {
240
+ name: string;
241
+ hex: string;
242
+ }
243
+ export interface BaseAgentParserOpt {
244
+ selector?: string;
245
+ }
246
+ export interface PuppeteerParserOpt extends BaseAgentParserOpt {
247
+ }
248
+ export interface PlaywrightParserOpt extends BaseAgentParserOpt {
249
+ }
250
+ export interface ExecutionTaskProgressOptions {
251
+ onTaskStart?: (task: ExecutionTask) => Promise<void> | void;
252
+ }
253
+ export interface ExecutionRecorderItem {
254
+ type: 'screenshot';
255
+ ts: number;
256
+ screenshot?: ScreenshotItem;
257
+ timing?: string;
258
+ }
259
+ export type ExecutionTaskType = 'Planning' | 'Insight' | 'Action Space' | 'Log';
260
+ export interface ExecutorContext {
261
+ task: ExecutionTask;
262
+ element?: LocateResultElement | null;
263
+ uiContext?: UIContext;
264
+ }
265
+ export interface ExecutionTaskApply<Type extends ExecutionTaskType = any, TaskParam = any, TaskOutput = any, TaskLog = any> {
266
+ type: Type;
267
+ subType?: string;
268
+ param?: TaskParam;
269
+ thought?: string;
270
+ uiContext?: UIContext;
271
+ executor: (param: TaskParam, context: ExecutorContext) => Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void> | undefined | void;
272
+ }
273
+ export interface ExecutionTaskHitBy {
274
+ from: string;
275
+ context: Record<string, any>;
276
+ }
277
+ export interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {
278
+ output?: TaskOutput;
279
+ log?: TaskLog;
280
+ recorder?: ExecutionRecorderItem[];
281
+ hitBy?: ExecutionTaskHitBy;
282
+ }
283
+ export type ExecutionTask<E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<any, any, any>> = E & ExecutionTaskReturn<E extends ExecutionTaskApply<any, any, infer TaskOutput, any> ? TaskOutput : unknown, E extends ExecutionTaskApply<any, any, any, infer TaskLog> ? TaskLog : unknown> & {
284
+ taskId: string;
285
+ status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';
286
+ error?: Error;
287
+ errorMessage?: string;
288
+ errorStack?: string;
289
+ timing?: {
290
+ start: number;
291
+ getUiContextStart?: number;
292
+ getUiContextEnd?: number;
293
+ callAiStart?: number;
294
+ callAiEnd?: number;
295
+ beforeInvokeActionHookStart?: number;
296
+ beforeInvokeActionHookEnd?: number;
297
+ callActionStart?: number;
298
+ callActionEnd?: number;
299
+ afterInvokeActionHookStart?: number;
300
+ afterInvokeActionHookEnd?: number;
301
+ captureAfterCallingSnapshotStart?: number;
302
+ captureAfterCallingSnapshotEnd?: number;
303
+ end?: number;
304
+ cost?: number;
305
+ };
306
+ usage?: AIUsageInfo;
307
+ searchAreaUsage?: AIUsageInfo;
308
+ reasoning_content?: string;
309
+ };
310
+ export interface IExecutionDump extends DumpMeta {
311
+ /** Stable unique identifier for this execution run */
312
+ id?: string;
313
+ name: string;
314
+ description?: string;
315
+ tasks: ExecutionTask[];
316
+ aiActContext?: string;
317
+ }
318
+ /**
319
+ * ExecutionDump class for serializing and deserializing execution dumps
320
+ */
321
+ export declare class ExecutionDump implements IExecutionDump {
322
+ id?: string;
323
+ logTime: number;
324
+ name: string;
325
+ description?: string;
326
+ tasks: ExecutionTask[];
327
+ aiActContext?: string;
328
+ constructor(data: IExecutionDump);
329
+ /**
330
+ * Serialize the ExecutionDump to a JSON string
331
+ */
332
+ serialize(indents?: number): string;
333
+ /**
334
+ * Convert to a plain object for JSON serialization
335
+ */
336
+ toJSON(): IExecutionDump;
337
+ /**
338
+ * Create an ExecutionDump instance from a serialized JSON string
339
+ */
340
+ static fromSerializedString(serialized: string): ExecutionDump;
341
+ /**
342
+ * Create an ExecutionDump instance from a plain object
343
+ */
344
+ static fromJSON(data: IExecutionDump): ExecutionDump;
345
+ /**
346
+ * Collect all ScreenshotItem instances from tasks.
347
+ * Scans through uiContext and recorder items to find screenshots.
348
+ *
349
+ * @returns Array of ScreenshotItem instances
350
+ */
351
+ collectScreenshots(): ScreenshotItem[];
352
+ }
353
+ export type ExecutionTaskInsightLocateParam = PlanningLocateParam;
354
+ export interface ExecutionTaskInsightLocateOutput {
355
+ element: LocateResultElement | null;
356
+ }
357
+ export type ExecutionTaskInsightDump = ServiceDump;
358
+ export type ExecutionTaskInsightLocateApply = ExecutionTaskApply<'Insight', ExecutionTaskInsightLocateParam, ExecutionTaskInsightLocateOutput, ExecutionTaskInsightDump>;
359
+ export type ExecutionTaskInsightLocate = ExecutionTask<ExecutionTaskInsightLocateApply>;
360
+ export interface ExecutionTaskInsightQueryParam {
361
+ dataDemand: ServiceExtractParam;
362
+ domIncluded?: boolean | 'visible-only';
363
+ }
364
+ export interface ExecutionTaskInsightQueryOutput {
365
+ data: any;
366
+ }
367
+ export type ExecutionTaskInsightQueryApply = ExecutionTaskApply<'Insight', ExecutionTaskInsightQueryParam, any, ExecutionTaskInsightDump>;
368
+ export type ExecutionTaskInsightQuery = ExecutionTask<ExecutionTaskInsightQueryApply>;
369
+ export interface ExecutionTaskInsightAssertionParam {
370
+ assertion: string;
371
+ }
372
+ export type ExecutionTaskInsightAssertionApply = ExecutionTaskApply<'Insight', ExecutionTaskInsightAssertionParam, ServiceAssertionResponse, ExecutionTaskInsightDump>;
373
+ export type ExecutionTaskInsightAssertion = ExecutionTask<ExecutionTaskInsightAssertionApply>;
374
+ export type ExecutionTaskActionApply<ActionParam = any> = ExecutionTaskApply<'Action Space', ActionParam, void, void>;
375
+ export type ExecutionTaskAction = ExecutionTask<ExecutionTaskActionApply>;
376
+ export type ExecutionTaskLogApply<LogParam = {
377
+ content: string;
378
+ }> = ExecutionTaskApply<'Log', LogParam, void, void>;
379
+ export type ExecutionTaskLog = ExecutionTask<ExecutionTaskLogApply>;
380
+ export type ExecutionTaskPlanningApply = ExecutionTaskApply<'Planning', {
381
+ userInstruction: string;
382
+ aiActContext?: string;
383
+ }, PlanningAIResponse>;
384
+ export type ExecutionTaskPlanning = ExecutionTask<ExecutionTaskPlanningApply>;
385
+ export type ExecutionTaskPlanningLocateParam = PlanningLocateParam;
386
+ export interface ExecutionTaskPlanningLocateOutput {
387
+ element: LocateResultElement | null;
388
+ }
389
+ export type ExecutionTaskPlanningDump = ServiceDump;
390
+ export type ExecutionTaskPlanningLocateApply = ExecutionTaskApply<'Planning', ExecutionTaskPlanningLocateParam, ExecutionTaskPlanningLocateOutput, ExecutionTaskPlanningDump>;
391
+ export type ExecutionTaskPlanningLocate = ExecutionTask<ExecutionTaskPlanningLocateApply>;
392
+ export interface ReportMeta {
393
+ groupName: string;
394
+ groupDescription?: string;
395
+ sdkVersion: string;
396
+ modelBriefs: ModelBrief[];
397
+ deviceType?: string;
398
+ }
399
+ export type GroupMeta = ReportMeta;
400
+ export interface IReportActionDump {
401
+ sdkVersion: string;
402
+ groupName: string;
403
+ groupDescription?: string;
404
+ modelBriefs: ModelBrief[];
405
+ executions: IExecutionDump[];
406
+ deviceType?: string;
407
+ }
408
+ export type IGroupedActionDump = IReportActionDump;
409
+ export interface ModelBrief {
410
+ /**
411
+ * The intent/category of the model call, for example "planning" or "insight".
412
+ */
413
+ intent?: string;
414
+ /**
415
+ * The model name returned by usage metadata, for example "gpt-4o".
416
+ */
417
+ name?: string;
418
+ /**
419
+ * Optional human-readable model description, for example "qwen2.5-vl mode".
420
+ */
421
+ modelDescription?: string;
422
+ }
423
+ /**
424
+ * ReportActionDump class for serializing and deserializing report action dumps
425
+ */
426
+ export declare class ReportActionDump implements IReportActionDump {
427
+ sdkVersion: string;
428
+ groupName: string;
429
+ groupDescription?: string;
430
+ modelBriefs: ModelBrief[];
431
+ executions: ExecutionDump[];
432
+ deviceType?: string;
433
+ constructor(data: IReportActionDump);
434
+ /**
435
+ * Serialize the ReportActionDump to a JSON string
436
+ * Uses compact { $screenshot: id } format
437
+ */
438
+ serialize(indents?: number): string;
439
+ /**
440
+ * Serialize the ReportActionDump with inline screenshots to a JSON string.
441
+ * Each ScreenshotItem is replaced with { base64: "...", capturedAt }.
442
+ */
443
+ serializeWithInlineScreenshots(indents?: number): string;
444
+ /**
445
+ * Convert to a plain object for JSON serialization
446
+ */
447
+ toJSON(): IReportActionDump;
448
+ /**
449
+ * Create a ReportActionDump instance from a serialized JSON string
450
+ */
451
+ static fromSerializedString(serialized: string): ReportActionDump;
452
+ /**
453
+ * Create a ReportActionDump instance from a plain object
454
+ */
455
+ static fromJSON(data: IReportActionDump): ReportActionDump;
456
+ /**
457
+ * Collect all ScreenshotItem instances from all executions.
458
+ *
459
+ * @returns Array of all ScreenshotItem instances across all executions
460
+ */
461
+ collectAllScreenshots(): ScreenshotItem[];
462
+ /**
463
+ * Serialize the dump to files with screenshots as separate PNG files.
464
+ * Creates:
465
+ * - {basePath} - dump JSON with { $screenshot: id } references
466
+ * - {basePath}.screenshots/ - PNG files
467
+ *
468
+ * @param basePath - Base path for the dump file
469
+ */
470
+ serializeToFiles(basePath: string): void;
471
+ /**
472
+ * Read dump from files and return JSON string with inline screenshots.
473
+ * Reads the dump JSON and screenshot files, then inlines the base64 data.
474
+ *
475
+ * @param basePath - Base path for the dump file
476
+ * @returns JSON string with inline screenshots ({ base64: "..." } format)
477
+ */
478
+ static fromFilesAsInlineJson(basePath: string): string;
479
+ /**
480
+ * Clean up all files associated with a serialized dump.
481
+ *
482
+ * @param basePath - Base path for the dump file
483
+ */
484
+ static cleanupFiles(basePath: string): void;
485
+ /**
486
+ * Get all file paths associated with a serialized dump.
487
+ *
488
+ * @param basePath - Base path for the dump file
489
+ * @returns Array of all associated file paths
490
+ */
491
+ static getFilePaths(basePath: string): string[];
492
+ }
493
+ export type GroupedActionDump = ReportActionDump;
494
+ export declare const GroupedActionDump: typeof ReportActionDump;
495
+ export type InterfaceType = 'puppeteer' | 'playwright' | 'static' | 'chrome-extension-proxy' | 'android' | string;
496
+ export interface StreamingCodeGenerationOptions {
497
+ /** Whether to enable streaming output */
498
+ stream?: boolean;
499
+ /** Callback function to handle streaming chunks */
500
+ onChunk?: StreamingCallback;
501
+ /** Callback function to handle streaming completion */
502
+ onComplete?: (finalCode: string) => void;
503
+ /** Callback function to handle streaming errors */
504
+ onError?: (error: Error) => void;
505
+ }
506
+ export type StreamingCallback = (chunk: CodeGenerationChunk) => void;
507
+ export interface CodeGenerationChunk {
508
+ /** The incremental content chunk */
509
+ content: string;
510
+ /** The reasoning content */
511
+ reasoning_content: string;
512
+ /** The accumulated content so far */
513
+ accumulated: string;
514
+ /** Whether this is the final chunk */
515
+ isComplete: boolean;
516
+ /** Token usage information if available */
517
+ usage?: AIUsageInfo;
518
+ }
519
+ export interface StreamingAIResponse {
520
+ /** The final accumulated content */
521
+ content: string;
522
+ /** Token usage information */
523
+ usage?: AIUsageInfo;
524
+ /** Whether the response was streamed */
525
+ isStreamed: boolean;
526
+ }
527
+ export interface DeviceAction<TParam = any, TReturn = any> {
528
+ name: string;
529
+ description?: string;
530
+ interfaceAlias?: string;
531
+ paramSchema?: z.ZodType<TParam>;
532
+ call: (param: TParam, context: ExecutorContext) => Promise<TReturn> | TReturn;
533
+ delayAfterRunner?: number;
534
+ /**
535
+ * An example param object for this action.
536
+ * Locate fields with { prompt } will automatically get bbox injected when needed.
537
+ */
538
+ sample?: {
539
+ [K in keyof TParam]?: any;
540
+ };
541
+ }
542
+ /**
543
+ * Type utilities for extracting types from DeviceAction definitions
544
+ */
545
+ /**
546
+ * Extract parameter type from a DeviceAction
547
+ */
548
+ export type ActionParam<Action extends DeviceAction<any, any>> = Action extends DeviceAction<infer P, any> ? P : never;
549
+ /**
550
+ * Extract return type from a DeviceAction
551
+ */
552
+ export type ActionReturn<Action extends DeviceAction<any, any>> = Action extends DeviceAction<any, infer R> ? R : never;
553
+ /**
554
+ * Web-specific types
555
+ */
556
+ export interface WebElementInfo extends BaseElement {
557
+ id: string;
558
+ attributes: {
559
+ nodeType: NodeType;
560
+ [key: string]: string;
561
+ };
562
+ }
563
+ /**
564
+ * Agent
565
+ */
566
+ export type CacheConfig = {
567
+ strategy?: 'read-only' | 'read-write' | 'write-only';
568
+ id: string;
569
+ };
570
+ export type Cache = false | true | CacheConfig;
571
+ export interface AgentOpt {
572
+ testId?: string;
573
+ cacheId?: string;
574
+ groupName?: string;
575
+ groupDescription?: string;
576
+ generateReport?: boolean;
577
+ persistExecutionDump?: boolean;
578
+ autoPrintReportMsg?: boolean;
579
+ /**
580
+ * Use directory-based report format with separate image files.
581
+ *
582
+ * When enabled:
583
+ * - Screenshots are saved as PNG files in a `screenshots/` subdirectory
584
+ * - Report is generated as `index.html` with relative image paths
585
+ * - Reduces memory usage and report file size
586
+ *
587
+ * IMPORTANT: 'html-and-external-assets' reports must be served via HTTP server
588
+ * (e.g., `npx serve ./report-dir`). The file:// protocol will not
589
+ * work due to browser CORS restrictions.
590
+ *
591
+ * @default 'single-html'
592
+ */
593
+ outputFormat?: 'single-html' | 'html-and-external-assets';
594
+ onTaskStartTip?: OnTaskStartTip;
595
+ aiActContext?: string;
596
+ aiActionContext?: string;
597
+ reportFileName?: string;
598
+ modelConfig?: TModelConfig;
599
+ cache?: Cache;
600
+ /**
601
+ * Maximum number of replanning cycles for aiAct.
602
+ * Defaults to 20 (40 for `vlm-ui-tars`) when not provided.
603
+ * If omitted, the agent will also read `MIDSCENE_REPLANNING_CYCLE_LIMIT` for backward compatibility.
604
+ */
605
+ replanningCycleLimit?: number;
606
+ /**
607
+ * Wait time in milliseconds after each action execution.
608
+ * This allows the UI to settle and stabilize before the next action.
609
+ * Defaults to 300ms when not provided.
610
+ */
611
+ waitAfterAction?: number;
612
+ /**
613
+ * When set to true, Midscene will use the target device's time (Android/iOS)
614
+ * instead of the system time. Useful when the device time differs from the
615
+ * host machine. Default: false
616
+ */
617
+ useDeviceTimestamp?: boolean;
618
+ /**
619
+ * Custom screenshot shrink factor to reduce AI token usage.
620
+ * When set, the screenshot will be scaled down by this factor from the physical resolution.
621
+ *
622
+ * Example:
623
+ * - Physical screen width: 3000px, dpr=6
624
+ * - Logical width: 500px
625
+ * - screenshotShrinkFactor: 2
626
+ * - Actual shrunk screenshot width: 3000 / 2 = 1500px
627
+ * - AI analyzes the 1500px screenshot
628
+ * - Coordinates are transformed back to logical (500px) before actions execute
629
+ *
630
+ * Benefits:
631
+ * - Reduces token usage for high-resolution screenshots
632
+ * - Maintains accuracy by scaling coordinates appropriately
633
+ *
634
+ * Must be >= 1 (shrinking only, enlarging is not supported).
635
+ *
636
+ * @default 1 (no shrinking, uses original physical screenshot)
637
+ */
638
+ screenshotShrinkFactor?: number;
639
+ /**
640
+ * Custom OpenAI client factory function
641
+ *
642
+ * If provided, this function will be called to create OpenAI client instances
643
+ * for each AI call, allowing you to:
644
+ * - Wrap clients with observability tools (langsmith, langfuse)
645
+ * - Use custom OpenAI-compatible clients
646
+ * - Apply different configurations based on intent
647
+ *
648
+ * @param config - Resolved model configuration
649
+ * @returns OpenAI client instance (original or wrapped)
650
+ *
651
+ * @example
652
+ * ```typescript
653
+ * createOpenAIClient: async (openai, opts) => {
654
+ * // Wrap with langsmith for planning tasks
655
+ * if (opts.baseURL?.includes('planning')) {
656
+ * return wrapOpenAI(openai, { metadata: { task: 'planning' } });
657
+ * }
658
+ *
659
+ * return openai;
660
+ * }
661
+ * ```
662
+ */
663
+ createOpenAIClient?: CreateOpenAIClientFn;
664
+ }
665
+ export type TestStatus = 'passed' | 'failed' | 'timedOut' | 'skipped' | 'interrupted';
666
+ export interface ReportFileAttributes {
667
+ testDuration: number;
668
+ testStatus: TestStatus;
669
+ testTitle: string;
670
+ testId: string;
671
+ testDescription: string;
672
+ }
673
+ export type ReportFileWithAttributes = {
674
+ reportFilePath: string;
675
+ reportAttributes: ReportFileAttributes;
676
+ } | {
677
+ reportFilePath?: string;
678
+ reportAttributes: ReportFileAttributes & {
679
+ testStatus: 'skipped';
680
+ };
681
+ };