@donggui/core 1.5.4-donggui.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (269) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +9 -0
  3. package/dist/es/agent/agent.mjs +709 -0
  4. package/dist/es/agent/agent.mjs.map +1 -0
  5. package/dist/es/agent/common.mjs +0 -0
  6. package/dist/es/agent/execution-session.mjs +41 -0
  7. package/dist/es/agent/execution-session.mjs.map +1 -0
  8. package/dist/es/agent/index.mjs +6 -0
  9. package/dist/es/agent/task-builder.mjs +330 -0
  10. package/dist/es/agent/task-builder.mjs.map +1 -0
  11. package/dist/es/agent/task-cache.mjs +186 -0
  12. package/dist/es/agent/task-cache.mjs.map +1 -0
  13. package/dist/es/agent/tasks.mjs +422 -0
  14. package/dist/es/agent/tasks.mjs.map +1 -0
  15. package/dist/es/agent/ui-utils.mjs +91 -0
  16. package/dist/es/agent/ui-utils.mjs.map +1 -0
  17. package/dist/es/agent/utils.mjs +198 -0
  18. package/dist/es/agent/utils.mjs.map +1 -0
  19. package/dist/es/ai-model/auto-glm/actions.mjs +224 -0
  20. package/dist/es/ai-model/auto-glm/actions.mjs.map +1 -0
  21. package/dist/es/ai-model/auto-glm/index.mjs +6 -0
  22. package/dist/es/ai-model/auto-glm/parser.mjs +239 -0
  23. package/dist/es/ai-model/auto-glm/parser.mjs.map +1 -0
  24. package/dist/es/ai-model/auto-glm/planning.mjs +71 -0
  25. package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -0
  26. package/dist/es/ai-model/auto-glm/prompt.mjs +222 -0
  27. package/dist/es/ai-model/auto-glm/prompt.mjs.map +1 -0
  28. package/dist/es/ai-model/auto-glm/util.mjs +9 -0
  29. package/dist/es/ai-model/auto-glm/util.mjs.map +1 -0
  30. package/dist/es/ai-model/conversation-history.mjs +195 -0
  31. package/dist/es/ai-model/conversation-history.mjs.map +1 -0
  32. package/dist/es/ai-model/index.mjs +11 -0
  33. package/dist/es/ai-model/inspect.mjs +386 -0
  34. package/dist/es/ai-model/inspect.mjs.map +1 -0
  35. package/dist/es/ai-model/llm-planning.mjs +233 -0
  36. package/dist/es/ai-model/llm-planning.mjs.map +1 -0
  37. package/dist/es/ai-model/prompt/common.mjs +7 -0
  38. package/dist/es/ai-model/prompt/common.mjs.map +1 -0
  39. package/dist/es/ai-model/prompt/describe.mjs +66 -0
  40. package/dist/es/ai-model/prompt/describe.mjs.map +1 -0
  41. package/dist/es/ai-model/prompt/extraction.mjs +129 -0
  42. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -0
  43. package/dist/es/ai-model/prompt/llm-locator.mjs +51 -0
  44. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -0
  45. package/dist/es/ai-model/prompt/llm-planning.mjs +364 -0
  46. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -0
  47. package/dist/es/ai-model/prompt/llm-section-locator.mjs +44 -0
  48. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -0
  49. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +35 -0
  50. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -0
  51. package/dist/es/ai-model/prompt/playwright-generator.mjs +117 -0
  52. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -0
  53. package/dist/es/ai-model/prompt/ui-tars-planning.mjs +36 -0
  54. package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -0
  55. package/dist/es/ai-model/prompt/util.mjs +59 -0
  56. package/dist/es/ai-model/prompt/util.mjs.map +1 -0
  57. package/dist/es/ai-model/prompt/yaml-generator.mjs +219 -0
  58. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -0
  59. package/dist/es/ai-model/service-caller/index.mjs +466 -0
  60. package/dist/es/ai-model/service-caller/index.mjs.map +1 -0
  61. package/dist/es/ai-model/ui-tars-planning.mjs +249 -0
  62. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -0
  63. package/dist/es/common.mjs +371 -0
  64. package/dist/es/common.mjs.map +1 -0
  65. package/dist/es/device/device-options.mjs +0 -0
  66. package/dist/es/device/index.mjs +300 -0
  67. package/dist/es/device/index.mjs.map +1 -0
  68. package/dist/es/dump/html-utils.mjs +211 -0
  69. package/dist/es/dump/html-utils.mjs.map +1 -0
  70. package/dist/es/dump/image-restoration.mjs +43 -0
  71. package/dist/es/dump/image-restoration.mjs.map +1 -0
  72. package/dist/es/dump/index.mjs +3 -0
  73. package/dist/es/index.mjs +15 -0
  74. package/dist/es/index.mjs.map +1 -0
  75. package/dist/es/report-generator.mjs +134 -0
  76. package/dist/es/report-generator.mjs.map +1 -0
  77. package/dist/es/report.mjs +111 -0
  78. package/dist/es/report.mjs.map +1 -0
  79. package/dist/es/screenshot-item.mjs +105 -0
  80. package/dist/es/screenshot-item.mjs.map +1 -0
  81. package/dist/es/service/index.mjs +256 -0
  82. package/dist/es/service/index.mjs.map +1 -0
  83. package/dist/es/service/utils.mjs +15 -0
  84. package/dist/es/service/utils.mjs.map +1 -0
  85. package/dist/es/skill/index.mjs +38 -0
  86. package/dist/es/skill/index.mjs.map +1 -0
  87. package/dist/es/task-runner.mjs +258 -0
  88. package/dist/es/task-runner.mjs.map +1 -0
  89. package/dist/es/task-timing.mjs +12 -0
  90. package/dist/es/task-timing.mjs.map +1 -0
  91. package/dist/es/tree.mjs +13 -0
  92. package/dist/es/tree.mjs.map +1 -0
  93. package/dist/es/types.mjs +196 -0
  94. package/dist/es/types.mjs.map +1 -0
  95. package/dist/es/utils.mjs +218 -0
  96. package/dist/es/utils.mjs.map +1 -0
  97. package/dist/es/yaml/builder.mjs +13 -0
  98. package/dist/es/yaml/builder.mjs.map +1 -0
  99. package/dist/es/yaml/index.mjs +4 -0
  100. package/dist/es/yaml/player.mjs +418 -0
  101. package/dist/es/yaml/player.mjs.map +1 -0
  102. package/dist/es/yaml/utils.mjs +73 -0
  103. package/dist/es/yaml/utils.mjs.map +1 -0
  104. package/dist/es/yaml.mjs +0 -0
  105. package/dist/lib/agent/agent.js +757 -0
  106. package/dist/lib/agent/agent.js.map +1 -0
  107. package/dist/lib/agent/common.js +5 -0
  108. package/dist/lib/agent/execution-session.js +75 -0
  109. package/dist/lib/agent/execution-session.js.map +1 -0
  110. package/dist/lib/agent/index.js +81 -0
  111. package/dist/lib/agent/index.js.map +1 -0
  112. package/dist/lib/agent/task-builder.js +367 -0
  113. package/dist/lib/agent/task-builder.js.map +1 -0
  114. package/dist/lib/agent/task-cache.js +238 -0
  115. package/dist/lib/agent/task-cache.js.map +1 -0
  116. package/dist/lib/agent/tasks.js +465 -0
  117. package/dist/lib/agent/tasks.js.map +1 -0
  118. package/dist/lib/agent/ui-utils.js +143 -0
  119. package/dist/lib/agent/ui-utils.js.map +1 -0
  120. package/dist/lib/agent/utils.js +275 -0
  121. package/dist/lib/agent/utils.js.map +1 -0
  122. package/dist/lib/ai-model/auto-glm/actions.js +258 -0
  123. package/dist/lib/ai-model/auto-glm/actions.js.map +1 -0
  124. package/dist/lib/ai-model/auto-glm/index.js +66 -0
  125. package/dist/lib/ai-model/auto-glm/index.js.map +1 -0
  126. package/dist/lib/ai-model/auto-glm/parser.js +282 -0
  127. package/dist/lib/ai-model/auto-glm/parser.js.map +1 -0
  128. package/dist/lib/ai-model/auto-glm/planning.js +105 -0
  129. package/dist/lib/ai-model/auto-glm/planning.js.map +1 -0
  130. package/dist/lib/ai-model/auto-glm/prompt.js +259 -0
  131. package/dist/lib/ai-model/auto-glm/prompt.js.map +1 -0
  132. package/dist/lib/ai-model/auto-glm/util.js +46 -0
  133. package/dist/lib/ai-model/auto-glm/util.js.map +1 -0
  134. package/dist/lib/ai-model/conversation-history.js +229 -0
  135. package/dist/lib/ai-model/conversation-history.js.map +1 -0
  136. package/dist/lib/ai-model/index.js +125 -0
  137. package/dist/lib/ai-model/index.js.map +1 -0
  138. package/dist/lib/ai-model/inspect.js +429 -0
  139. package/dist/lib/ai-model/inspect.js.map +1 -0
  140. package/dist/lib/ai-model/llm-planning.js +270 -0
  141. package/dist/lib/ai-model/llm-planning.js.map +1 -0
  142. package/dist/lib/ai-model/prompt/common.js +41 -0
  143. package/dist/lib/ai-model/prompt/common.js.map +1 -0
  144. package/dist/lib/ai-model/prompt/describe.js +100 -0
  145. package/dist/lib/ai-model/prompt/describe.js.map +1 -0
  146. package/dist/lib/ai-model/prompt/extraction.js +169 -0
  147. package/dist/lib/ai-model/prompt/extraction.js.map +1 -0
  148. package/dist/lib/ai-model/prompt/llm-locator.js +88 -0
  149. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -0
  150. package/dist/lib/ai-model/prompt/llm-planning.js +401 -0
  151. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -0
  152. package/dist/lib/ai-model/prompt/llm-section-locator.js +81 -0
  153. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -0
  154. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +72 -0
  155. package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -0
  156. package/dist/lib/ai-model/prompt/playwright-generator.js +178 -0
  157. package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -0
  158. package/dist/lib/ai-model/prompt/ui-tars-planning.js +73 -0
  159. package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -0
  160. package/dist/lib/ai-model/prompt/util.js +105 -0
  161. package/dist/lib/ai-model/prompt/util.js.map +1 -0
  162. package/dist/lib/ai-model/prompt/yaml-generator.js +280 -0
  163. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -0
  164. package/dist/lib/ai-model/service-caller/index.js +531 -0
  165. package/dist/lib/ai-model/service-caller/index.js.map +1 -0
  166. package/dist/lib/ai-model/ui-tars-planning.js +283 -0
  167. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -0
  168. package/dist/lib/common.js +480 -0
  169. package/dist/lib/common.js.map +1 -0
  170. package/dist/lib/device/device-options.js +20 -0
  171. package/dist/lib/device/device-options.js.map +1 -0
  172. package/dist/lib/device/index.js +418 -0
  173. package/dist/lib/device/index.js.map +1 -0
  174. package/dist/lib/dump/html-utils.js +281 -0
  175. package/dist/lib/dump/html-utils.js.map +1 -0
  176. package/dist/lib/dump/image-restoration.js +77 -0
  177. package/dist/lib/dump/image-restoration.js.map +1 -0
  178. package/dist/lib/dump/index.js +60 -0
  179. package/dist/lib/dump/index.js.map +1 -0
  180. package/dist/lib/index.js +146 -0
  181. package/dist/lib/index.js.map +1 -0
  182. package/dist/lib/report-generator.js +172 -0
  183. package/dist/lib/report-generator.js.map +1 -0
  184. package/dist/lib/report.js +145 -0
  185. package/dist/lib/report.js.map +1 -0
  186. package/dist/lib/screenshot-item.js +139 -0
  187. package/dist/lib/screenshot-item.js.map +1 -0
  188. package/dist/lib/service/index.js +290 -0
  189. package/dist/lib/service/index.js.map +1 -0
  190. package/dist/lib/service/utils.js +49 -0
  191. package/dist/lib/service/utils.js.map +1 -0
  192. package/dist/lib/skill/index.js +72 -0
  193. package/dist/lib/skill/index.js.map +1 -0
  194. package/dist/lib/task-runner.js +295 -0
  195. package/dist/lib/task-runner.js.map +1 -0
  196. package/dist/lib/task-timing.js +46 -0
  197. package/dist/lib/task-timing.js.map +1 -0
  198. package/dist/lib/tree.js +53 -0
  199. package/dist/lib/tree.js.map +1 -0
  200. package/dist/lib/types.js +285 -0
  201. package/dist/lib/types.js.map +1 -0
  202. package/dist/lib/utils.js +297 -0
  203. package/dist/lib/utils.js.map +1 -0
  204. package/dist/lib/yaml/builder.js +57 -0
  205. package/dist/lib/yaml/builder.js.map +1 -0
  206. package/dist/lib/yaml/index.js +81 -0
  207. package/dist/lib/yaml/index.js.map +1 -0
  208. package/dist/lib/yaml/player.js +452 -0
  209. package/dist/lib/yaml/player.js.map +1 -0
  210. package/dist/lib/yaml/utils.js +126 -0
  211. package/dist/lib/yaml/utils.js.map +1 -0
  212. package/dist/lib/yaml.js +20 -0
  213. package/dist/lib/yaml.js.map +1 -0
  214. package/dist/types/agent/agent.d.ts +190 -0
  215. package/dist/types/agent/common.d.ts +0 -0
  216. package/dist/types/agent/execution-session.d.ts +36 -0
  217. package/dist/types/agent/index.d.ts +10 -0
  218. package/dist/types/agent/task-builder.d.ts +34 -0
  219. package/dist/types/agent/task-cache.d.ts +48 -0
  220. package/dist/types/agent/tasks.d.ts +70 -0
  221. package/dist/types/agent/ui-utils.d.ts +14 -0
  222. package/dist/types/agent/utils.d.ts +29 -0
  223. package/dist/types/ai-model/auto-glm/actions.d.ts +77 -0
  224. package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
  225. package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
  226. package/dist/types/ai-model/auto-glm/planning.d.ts +10 -0
  227. package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
  228. package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
  229. package/dist/types/ai-model/conversation-history.d.ts +105 -0
  230. package/dist/types/ai-model/index.d.ts +14 -0
  231. package/dist/types/ai-model/inspect.d.ts +58 -0
  232. package/dist/types/ai-model/llm-planning.d.ts +19 -0
  233. package/dist/types/ai-model/prompt/common.d.ts +2 -0
  234. package/dist/types/ai-model/prompt/describe.d.ts +1 -0
  235. package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
  236. package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
  237. package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
  238. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
  239. package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
  240. package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
  241. package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
  242. package/dist/types/ai-model/prompt/util.d.ts +33 -0
  243. package/dist/types/ai-model/prompt/yaml-generator.d.ts +100 -0
  244. package/dist/types/ai-model/service-caller/index.d.ts +49 -0
  245. package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
  246. package/dist/types/common.d.ts +288 -0
  247. package/dist/types/device/device-options.d.ts +142 -0
  248. package/dist/types/device/index.d.ts +2315 -0
  249. package/dist/types/dump/html-utils.d.ts +52 -0
  250. package/dist/types/dump/image-restoration.d.ts +6 -0
  251. package/dist/types/dump/index.d.ts +5 -0
  252. package/dist/types/index.d.ts +17 -0
  253. package/dist/types/report-generator.d.ts +48 -0
  254. package/dist/types/report.d.ts +15 -0
  255. package/dist/types/screenshot-item.d.ts +66 -0
  256. package/dist/types/service/index.d.ts +23 -0
  257. package/dist/types/service/utils.d.ts +2 -0
  258. package/dist/types/skill/index.d.ts +25 -0
  259. package/dist/types/task-runner.d.ts +48 -0
  260. package/dist/types/task-timing.d.ts +8 -0
  261. package/dist/types/tree.d.ts +4 -0
  262. package/dist/types/types.d.ts +645 -0
  263. package/dist/types/utils.d.ts +40 -0
  264. package/dist/types/yaml/builder.d.ts +2 -0
  265. package/dist/types/yaml/index.d.ts +4 -0
  266. package/dist/types/yaml/player.d.ts +34 -0
  267. package/dist/types/yaml/utils.d.ts +9 -0
  268. package/dist/types/yaml.d.ts +203 -0
  269. package/package.json +111 -0
@@ -0,0 +1,645 @@
1
+ import type { NodeType } from '@midscene/shared/constants';
2
+ import type { CreateOpenAIClientFn, TModelConfig } from '@midscene/shared/env';
3
+ import type { BaseElement, LocateResultElement, Rect, Size } from '@midscene/shared/types';
4
+ import type { z } from 'zod';
5
+ import type { TUserPrompt } from './common';
6
+ import { ScreenshotItem } from './screenshot-item';
7
+ import type { DetailedLocateParam, MidsceneYamlFlowItem, ServiceExtractOption } from './yaml';
8
+ export type { ElementTreeNode, BaseElement, Rect, Size, Point, } from '@midscene/shared/types';
9
+ export * from './yaml';
10
+ export type AIUsageInfo = Record<string, any> & {
11
+ prompt_tokens: number | undefined;
12
+ completion_tokens: number | undefined;
13
+ total_tokens: number | undefined;
14
+ cached_input: number | undefined;
15
+ time_cost: number | undefined;
16
+ model_name: string | undefined;
17
+ model_description: string | undefined;
18
+ intent: string | undefined;
19
+ request_id: string | undefined;
20
+ };
21
+ export type { LocateResultElement };
22
+ export type AISingleElementResponseByPosition = {
23
+ position?: {
24
+ x: number;
25
+ y: number;
26
+ };
27
+ bbox?: [number, number, number, number];
28
+ reason: string;
29
+ text: string;
30
+ };
31
+ export interface AIElementCoordinatesResponse {
32
+ bbox: [number, number, number, number];
33
+ errors?: string[];
34
+ }
35
+ export type AIElementResponse = AIElementCoordinatesResponse;
36
+ export interface AIDataExtractionResponse<DataDemand> {
37
+ data: DataDemand;
38
+ errors?: string[];
39
+ thought?: string;
40
+ }
41
+ export interface AISectionLocatorResponse {
42
+ bbox: [number, number, number, number];
43
+ references_bbox?: [number, number, number, number][];
44
+ error?: string;
45
+ }
46
+ export interface AIAssertionResponse {
47
+ pass: boolean;
48
+ thought: string;
49
+ }
50
+ export interface AIDescribeElementResponse {
51
+ description: string;
52
+ error?: string;
53
+ }
54
+ export interface LocatorValidatorOption {
55
+ centerDistanceThreshold?: number;
56
+ }
57
+ export interface LocateValidatorResult {
58
+ pass: boolean;
59
+ rect: Rect;
60
+ center: [number, number];
61
+ centerDistance?: number;
62
+ }
63
+ export interface AgentDescribeElementAtPointResult {
64
+ prompt: string;
65
+ deepLocate: boolean;
66
+ verifyResult?: LocateValidatorResult;
67
+ }
68
+ /**
69
+ * context
70
+ */
71
+ export declare abstract class UIContext {
72
+ /**
73
+ * screenshot of the current UI state. which size is shotSize(be shrunk by screenshotShrinkFactor),
74
+ */
75
+ abstract screenshot: ScreenshotItem;
76
+ /**
77
+ * screenshot size after shrinking
78
+ */
79
+ abstract shotSize: Size;
80
+ /**
81
+ * The ratio for converting shrunk screenshot coordinates to logical coordinates.
82
+ *
83
+ * Example:
84
+ * - Physical screen width: 3000px, dpr=6
85
+ * - Logical width: 500px
86
+ * - User-defined screenshotShrinkFactor: 2
87
+ * - Actual shrunk screenshot width: 3000 / 2 = 1500px
88
+ * - shrunkShotToLogicalRatio: dpr / screenshotShrinkFactor = 6 / 2 = 3
89
+ * - To map back to logical coordinates: 1500 / shrunkShotToLogicalRatio = 500px
90
+ */
91
+ abstract shrunkShotToLogicalRatio: number;
92
+ abstract _isFrozen?: boolean;
93
+ abstract deprecatedDpr?: number;
94
+ }
95
+ export type EnsureObject<T> = {
96
+ [K in keyof T]: any;
97
+ };
98
+ export type ServiceAction = 'locate' | 'extract' | 'assert' | 'describe';
99
+ export type ServiceExtractParam = string | Record<string, string>;
100
+ export type ElementCacheFeature = Record<string, unknown>;
101
+ export interface LocateResult {
102
+ element: LocateResultElement | null;
103
+ rect?: Rect;
104
+ }
105
+ export type ThinkingLevel = 'off' | 'medium' | 'high';
106
+ export type DeepThinkOption = 'unset' | true | false;
107
+ export interface ServiceTaskInfo {
108
+ durationMs: number;
109
+ formatResponse?: string;
110
+ rawResponse?: string;
111
+ usage?: AIUsageInfo;
112
+ searchArea?: Rect;
113
+ searchAreaRawResponse?: string;
114
+ searchAreaUsage?: AIUsageInfo;
115
+ reasoning_content?: string;
116
+ }
117
+ export interface DumpMeta {
118
+ logTime: number;
119
+ }
120
+ export interface ReportDumpWithAttributes {
121
+ dumpString: string;
122
+ attributes?: Record<string, any>;
123
+ }
124
+ export interface ServiceDump extends DumpMeta {
125
+ type: 'locate' | 'extract' | 'assert';
126
+ logId: string;
127
+ userQuery: {
128
+ element?: TUserPrompt;
129
+ dataDemand?: ServiceExtractParam;
130
+ assertion?: TUserPrompt;
131
+ };
132
+ matchedElement: LocateResultElement[];
133
+ matchedRect?: Rect;
134
+ deepLocate?: boolean;
135
+ data: any;
136
+ assertionPass?: boolean;
137
+ assertionThought?: string;
138
+ taskInfo: ServiceTaskInfo;
139
+ error?: string;
140
+ output?: any;
141
+ }
142
+ export type PartialServiceDumpFromSDK = Omit<ServiceDump, 'logTime' | 'logId' | 'model_name'>;
143
+ export interface ServiceResultBase {
144
+ dump: ServiceDump;
145
+ }
146
+ export type LocateResultWithDump = LocateResult & ServiceResultBase;
147
+ export interface ServiceExtractResult<T> extends ServiceResultBase {
148
+ data: T;
149
+ thought?: string;
150
+ usage?: AIUsageInfo;
151
+ reasoning_content?: string;
152
+ }
153
+ export declare class ServiceError extends Error {
154
+ dump: ServiceDump;
155
+ constructor(message: string, dump: ServiceDump);
156
+ }
157
+ export interface LiteUISection {
158
+ name: string;
159
+ description: string;
160
+ sectionCharacteristics: string;
161
+ textIds: string[];
162
+ }
163
+ export type ElementById = (id: string) => BaseElement | null;
164
+ export type ServiceAssertionResponse = AIAssertionResponse & {
165
+ usage?: AIUsageInfo;
166
+ };
167
+ /**
168
+ * agent
169
+ */
170
+ export type OnTaskStartTip = (tip: string) => Promise<void> | void;
171
+ export interface AgentWaitForOpt extends ServiceExtractOption {
172
+ checkIntervalMs?: number;
173
+ timeoutMs?: number;
174
+ }
175
+ export interface AgentAssertOpt {
176
+ keepRawResponse?: boolean;
177
+ }
178
+ /**
179
+ * planning
180
+ *
181
+ */
182
+ export interface PlanningLocateParam extends DetailedLocateParam {
183
+ bbox?: [number, number, number, number];
184
+ }
185
+ export interface PlanningAction<ParamType = any> {
186
+ thought?: string;
187
+ log?: string;
188
+ type: string;
189
+ param: ParamType;
190
+ }
191
+ export type SubGoalStatus = 'pending' | 'running' | 'finished';
192
+ export interface SubGoal {
193
+ index: number;
194
+ status: SubGoalStatus;
195
+ description: string;
196
+ logs?: string[];
197
+ }
198
+ export interface RawResponsePlanningAIResponse {
199
+ action: PlanningAction;
200
+ thought?: string;
201
+ log: string;
202
+ memory?: string;
203
+ error?: string;
204
+ finalizeMessage?: string;
205
+ finalizeSuccess?: boolean;
206
+ updateSubGoals?: SubGoal[];
207
+ markFinishedIndexes?: number[];
208
+ }
209
+ export interface PlanningAIResponse extends Omit<RawResponsePlanningAIResponse, 'action'> {
210
+ actions?: PlanningAction[];
211
+ usage?: AIUsageInfo;
212
+ rawResponse?: string;
213
+ yamlFlow?: MidsceneYamlFlowItem[];
214
+ yamlString?: string;
215
+ error?: string;
216
+ reasoning_content?: string;
217
+ shouldContinuePlanning: boolean;
218
+ output?: string;
219
+ }
220
+ export interface PlanningActionParamSleep {
221
+ timeMs: number;
222
+ }
223
+ export interface PlanningActionParamError {
224
+ thought: string;
225
+ }
226
+ export type PlanningActionParamWaitFor = AgentWaitForOpt & {};
227
+ export interface LongPressParam {
228
+ duration?: number;
229
+ }
230
+ export interface PullParam {
231
+ direction: 'up' | 'down';
232
+ distance?: number;
233
+ duration?: number;
234
+ }
235
+ /**
236
+ * misc
237
+ */
238
+ export interface Color {
239
+ name: string;
240
+ hex: string;
241
+ }
242
+ export interface BaseAgentParserOpt {
243
+ selector?: string;
244
+ }
245
+ export interface PuppeteerParserOpt extends BaseAgentParserOpt {
246
+ }
247
+ export interface PlaywrightParserOpt extends BaseAgentParserOpt {
248
+ }
249
+ export interface ExecutionTaskProgressOptions {
250
+ onTaskStart?: (task: ExecutionTask) => Promise<void> | void;
251
+ }
252
+ export interface ExecutionRecorderItem {
253
+ type: 'screenshot';
254
+ ts: number;
255
+ screenshot?: ScreenshotItem;
256
+ timing?: string;
257
+ }
258
+ export type ExecutionTaskType = 'Planning' | 'Insight' | 'Action Space' | 'Log';
259
+ export interface ExecutorContext {
260
+ task: ExecutionTask;
261
+ element?: LocateResultElement | null;
262
+ uiContext?: UIContext;
263
+ }
264
+ export interface ExecutionTaskApply<Type extends ExecutionTaskType = any, TaskParam = any, TaskOutput = any, TaskLog = any> {
265
+ type: Type;
266
+ subType?: string;
267
+ param?: TaskParam;
268
+ thought?: string;
269
+ uiContext?: UIContext;
270
+ executor: (param: TaskParam, context: ExecutorContext) => Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void> | undefined | void;
271
+ }
272
+ export interface ExecutionTaskHitBy {
273
+ from: string;
274
+ context: Record<string, any>;
275
+ }
276
+ export interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {
277
+ output?: TaskOutput;
278
+ log?: TaskLog;
279
+ recorder?: ExecutionRecorderItem[];
280
+ hitBy?: ExecutionTaskHitBy;
281
+ }
282
+ export type ExecutionTask<E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<any, any, any>> = E & ExecutionTaskReturn<E extends ExecutionTaskApply<any, any, infer TaskOutput, any> ? TaskOutput : unknown, E extends ExecutionTaskApply<any, any, any, infer TaskLog> ? TaskLog : unknown> & {
283
+ taskId: string;
284
+ status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';
285
+ error?: Error;
286
+ errorMessage?: string;
287
+ errorStack?: string;
288
+ timing?: {
289
+ start: number;
290
+ getUiContextStart?: number;
291
+ getUiContextEnd?: number;
292
+ callAiStart?: number;
293
+ callAiEnd?: number;
294
+ beforeInvokeActionHookStart?: number;
295
+ beforeInvokeActionHookEnd?: number;
296
+ callActionStart?: number;
297
+ callActionEnd?: number;
298
+ afterInvokeActionHookStart?: number;
299
+ afterInvokeActionHookEnd?: number;
300
+ captureAfterCallingSnapshotStart?: number;
301
+ captureAfterCallingSnapshotEnd?: number;
302
+ end?: number;
303
+ cost?: number;
304
+ };
305
+ usage?: AIUsageInfo;
306
+ searchAreaUsage?: AIUsageInfo;
307
+ reasoning_content?: string;
308
+ };
309
+ export interface IExecutionDump extends DumpMeta {
310
+ name: string;
311
+ description?: string;
312
+ tasks: ExecutionTask[];
313
+ aiActContext?: string;
314
+ }
315
+ /**
316
+ * ExecutionDump class for serializing and deserializing execution dumps
317
+ */
318
+ export declare class ExecutionDump implements IExecutionDump {
319
+ logTime: number;
320
+ name: string;
321
+ description?: string;
322
+ tasks: ExecutionTask[];
323
+ aiActContext?: string;
324
+ constructor(data: IExecutionDump);
325
+ /**
326
+ * Serialize the ExecutionDump to a JSON string
327
+ */
328
+ serialize(indents?: number): string;
329
+ /**
330
+ * Convert to a plain object for JSON serialization
331
+ */
332
+ toJSON(): IExecutionDump;
333
+ /**
334
+ * Create an ExecutionDump instance from a serialized JSON string
335
+ */
336
+ static fromSerializedString(serialized: string): ExecutionDump;
337
+ /**
338
+ * Create an ExecutionDump instance from a plain object
339
+ */
340
+ static fromJSON(data: IExecutionDump): ExecutionDump;
341
+ /**
342
+ * Collect all ScreenshotItem instances from tasks.
343
+ * Scans through uiContext and recorder items to find screenshots.
344
+ *
345
+ * @returns Array of ScreenshotItem instances
346
+ */
347
+ collectScreenshots(): ScreenshotItem[];
348
+ }
349
+ export type ExecutionTaskInsightLocateParam = PlanningLocateParam;
350
+ export interface ExecutionTaskInsightLocateOutput {
351
+ element: LocateResultElement | null;
352
+ }
353
+ export type ExecutionTaskInsightDump = ServiceDump;
354
+ export type ExecutionTaskInsightLocateApply = ExecutionTaskApply<'Insight', ExecutionTaskInsightLocateParam, ExecutionTaskInsightLocateOutput, ExecutionTaskInsightDump>;
355
+ export type ExecutionTaskInsightLocate = ExecutionTask<ExecutionTaskInsightLocateApply>;
356
+ export interface ExecutionTaskInsightQueryParam {
357
+ dataDemand: ServiceExtractParam;
358
+ }
359
+ export interface ExecutionTaskInsightQueryOutput {
360
+ data: any;
361
+ }
362
+ export type ExecutionTaskInsightQueryApply = ExecutionTaskApply<'Insight', ExecutionTaskInsightQueryParam, any, ExecutionTaskInsightDump>;
363
+ export type ExecutionTaskInsightQuery = ExecutionTask<ExecutionTaskInsightQueryApply>;
364
+ export interface ExecutionTaskInsightAssertionParam {
365
+ assertion: string;
366
+ }
367
+ export type ExecutionTaskInsightAssertionApply = ExecutionTaskApply<'Insight', ExecutionTaskInsightAssertionParam, ServiceAssertionResponse, ExecutionTaskInsightDump>;
368
+ export type ExecutionTaskInsightAssertion = ExecutionTask<ExecutionTaskInsightAssertionApply>;
369
+ export type ExecutionTaskActionApply<ActionParam = any> = ExecutionTaskApply<'Action Space', ActionParam, void, void>;
370
+ export type ExecutionTaskAction = ExecutionTask<ExecutionTaskActionApply>;
371
+ export type ExecutionTaskLogApply<LogParam = {
372
+ content: string;
373
+ }> = ExecutionTaskApply<'Log', LogParam, void, void>;
374
+ export type ExecutionTaskLog = ExecutionTask<ExecutionTaskLogApply>;
375
+ export type ExecutionTaskPlanningApply = ExecutionTaskApply<'Planning', {
376
+ userInstruction: string;
377
+ aiActContext?: string;
378
+ }, PlanningAIResponse>;
379
+ export type ExecutionTaskPlanning = ExecutionTask<ExecutionTaskPlanningApply>;
380
+ export type ExecutionTaskPlanningLocateParam = PlanningLocateParam;
381
+ export interface ExecutionTaskPlanningLocateOutput {
382
+ element: LocateResultElement | null;
383
+ }
384
+ export type ExecutionTaskPlanningDump = ServiceDump;
385
+ export type ExecutionTaskPlanningLocateApply = ExecutionTaskApply<'Planning', ExecutionTaskPlanningLocateParam, ExecutionTaskPlanningLocateOutput, ExecutionTaskPlanningDump>;
386
+ export type ExecutionTaskPlanningLocate = ExecutionTask<ExecutionTaskPlanningLocateApply>;
387
+ export interface IGroupedActionDump {
388
+ sdkVersion: string;
389
+ groupName: string;
390
+ groupDescription?: string;
391
+ modelBriefs: string[];
392
+ executions: IExecutionDump[];
393
+ deviceType?: string;
394
+ }
395
+ /**
396
+ * GroupedActionDump class for serializing and deserializing grouped action dumps
397
+ */
398
+ export declare class GroupedActionDump implements IGroupedActionDump {
399
+ sdkVersion: string;
400
+ groupName: string;
401
+ groupDescription?: string;
402
+ modelBriefs: string[];
403
+ executions: ExecutionDump[];
404
+ deviceType?: string;
405
+ constructor(data: IGroupedActionDump);
406
+ /**
407
+ * Serialize the GroupedActionDump to a JSON string
408
+ * Uses compact { $screenshot: id } format
409
+ */
410
+ serialize(indents?: number): string;
411
+ /**
412
+ * Serialize the GroupedActionDump with inline screenshots to a JSON string.
413
+ * Each ScreenshotItem is replaced with { base64: "...", capturedAt }.
414
+ */
415
+ serializeWithInlineScreenshots(indents?: number): string;
416
+ /**
417
+ * Convert to a plain object for JSON serialization
418
+ */
419
+ toJSON(): IGroupedActionDump;
420
+ /**
421
+ * Create a GroupedActionDump instance from a serialized JSON string
422
+ */
423
+ static fromSerializedString(serialized: string): GroupedActionDump;
424
+ /**
425
+ * Create a GroupedActionDump instance from a plain object
426
+ */
427
+ static fromJSON(data: IGroupedActionDump): GroupedActionDump;
428
+ /**
429
+ * Collect all ScreenshotItem instances from all executions.
430
+ *
431
+ * @returns Array of all ScreenshotItem instances across all executions
432
+ */
433
+ collectAllScreenshots(): ScreenshotItem[];
434
+ /**
435
+ * Serialize the dump to files with screenshots as separate PNG files.
436
+ * Creates:
437
+ * - {basePath} - dump JSON with { $screenshot: id } references
438
+ * - {basePath}.screenshots/ - PNG files
439
+ * - {basePath}.screenshots.json - ID to path mapping
440
+ *
441
+ * @param basePath - Base path for the dump file
442
+ */
443
+ serializeToFiles(basePath: string): void;
444
+ /**
445
+ * Read dump from files and return JSON string with inline screenshots.
446
+ * Reads the dump JSON and screenshot files, then inlines the base64 data.
447
+ *
448
+ * @param basePath - Base path for the dump file
449
+ * @returns JSON string with inline screenshots ({ base64: "..." } format)
450
+ */
451
+ static fromFilesAsInlineJson(basePath: string): string;
452
+ /**
453
+ * Clean up all files associated with a serialized dump.
454
+ *
455
+ * @param basePath - Base path for the dump file
456
+ */
457
+ static cleanupFiles(basePath: string): void;
458
+ /**
459
+ * Get all file paths associated with a serialized dump.
460
+ *
461
+ * @param basePath - Base path for the dump file
462
+ * @returns Array of all associated file paths
463
+ */
464
+ static getFilePaths(basePath: string): string[];
465
+ }
466
+ export type InterfaceType = 'puppeteer' | 'playwright' | 'static' | 'chrome-extension-proxy' | 'android' | string;
467
+ export interface StreamingCodeGenerationOptions {
468
+ /** Whether to enable streaming output */
469
+ stream?: boolean;
470
+ /** Callback function to handle streaming chunks */
471
+ onChunk?: StreamingCallback;
472
+ /** Callback function to handle streaming completion */
473
+ onComplete?: (finalCode: string) => void;
474
+ /** Callback function to handle streaming errors */
475
+ onError?: (error: Error) => void;
476
+ }
477
+ export type StreamingCallback = (chunk: CodeGenerationChunk) => void;
478
+ export interface CodeGenerationChunk {
479
+ /** The incremental content chunk */
480
+ content: string;
481
+ /** The reasoning content */
482
+ reasoning_content: string;
483
+ /** The accumulated content so far */
484
+ accumulated: string;
485
+ /** Whether this is the final chunk */
486
+ isComplete: boolean;
487
+ /** Token usage information if available */
488
+ usage?: AIUsageInfo;
489
+ }
490
+ export interface StreamingAIResponse {
491
+ /** The final accumulated content */
492
+ content: string;
493
+ /** Token usage information */
494
+ usage?: AIUsageInfo;
495
+ /** Whether the response was streamed */
496
+ isStreamed: boolean;
497
+ }
498
+ export interface DeviceAction<TParam = any, TReturn = any> {
499
+ name: string;
500
+ description?: string;
501
+ interfaceAlias?: string;
502
+ paramSchema?: z.ZodType<TParam>;
503
+ call: (param: TParam, context: ExecutorContext) => Promise<TReturn> | TReturn;
504
+ delayAfterRunner?: number;
505
+ /**
506
+ * An example param object for this action.
507
+ * Locate fields with { prompt } will automatically get bbox injected when needed.
508
+ */
509
+ sample?: {
510
+ [K in keyof TParam]?: any;
511
+ };
512
+ }
513
+ /**
514
+ * Type utilities for extracting types from DeviceAction definitions
515
+ */
516
+ /**
517
+ * Extract parameter type from a DeviceAction
518
+ */
519
+ export type ActionParam<Action extends DeviceAction<any, any>> = Action extends DeviceAction<infer P, any> ? P : never;
520
+ /**
521
+ * Extract return type from a DeviceAction
522
+ */
523
+ export type ActionReturn<Action extends DeviceAction<any, any>> = Action extends DeviceAction<any, infer R> ? R : never;
524
+ /**
525
+ * Web-specific types
526
+ */
527
+ export interface WebElementInfo extends BaseElement {
528
+ id: string;
529
+ attributes: {
530
+ nodeType: NodeType;
531
+ [key: string]: string;
532
+ };
533
+ }
534
+ /**
535
+ * Agent
536
+ */
537
+ export type CacheConfig = {
538
+ strategy?: 'read-only' | 'read-write' | 'write-only';
539
+ id: string;
540
+ };
541
+ export type Cache = false | true | CacheConfig;
542
+ export interface AgentOpt {
543
+ testId?: string;
544
+ cacheId?: string;
545
+ groupName?: string;
546
+ groupDescription?: string;
547
+ generateReport?: boolean;
548
+ autoPrintReportMsg?: boolean;
549
+ /**
550
+ * Use directory-based report format with separate image files.
551
+ *
552
+ * When enabled:
553
+ * - Screenshots are saved as PNG files in a `screenshots/` subdirectory
554
+ * - Report is generated as `index.html` with relative image paths
555
+ * - Reduces memory usage and report file size
556
+ *
557
+ * IMPORTANT: 'html-and-external-assets' reports must be served via HTTP server
558
+ * (e.g., `npx serve ./report-dir`). The file:// protocol will not
559
+ * work due to browser CORS restrictions.
560
+ *
561
+ * @default 'single-html'
562
+ */
563
+ outputFormat?: 'single-html' | 'html-and-external-assets';
564
+ onTaskStartTip?: OnTaskStartTip;
565
+ aiActContext?: string;
566
+ aiActionContext?: string;
567
+ reportFileName?: string;
568
+ modelConfig?: TModelConfig;
569
+ cache?: Cache;
570
+ /**
571
+ * Maximum number of replanning cycles for aiAct.
572
+ * Defaults to 20 (40 for `vlm-ui-tars`) when not provided.
573
+ * If omitted, the agent will also read `MIDSCENE_REPLANNING_CYCLE_LIMIT` for backward compatibility.
574
+ */
575
+ replanningCycleLimit?: number;
576
+ /**
577
+ * Wait time in milliseconds after each action execution.
578
+ * This allows the UI to settle and stabilize before the next action.
579
+ * Defaults to 300ms when not provided.
580
+ */
581
+ waitAfterAction?: number;
582
+ /**
583
+ * When set to true, Midscene will use the target device's time (Android/iOS)
584
+ * instead of the system time. Useful when the device time differs from the
585
+ * host machine. Default: false
586
+ */
587
+ useDeviceTimestamp?: boolean;
588
+ /**
589
+ * Custom screenshot shrink factor to reduce AI token usage.
590
+ * When set, the screenshot will be scaled down by this factor from the physical resolution.
591
+ *
592
+ * Example:
593
+ * - Physical screen width: 3000px, dpr=6
594
+ * - Logical width: 500px
595
+ * - screenshotShrinkFactor: 2
596
+ * - Actual shrunk screenshot width: 3000 / 2 = 1500px
597
+ * - AI analyzes the 1500px screenshot
598
+ * - Coordinates are transformed back to logical (500px) before actions execute
599
+ *
600
+ * Benefits:
601
+ * - Reduces token usage for high-resolution screenshots
602
+ * - Maintains accuracy by scaling coordinates appropriately
603
+ *
604
+ * Must be >= 1 (shrinking only, enlarging is not supported).
605
+ *
606
+ * @default 1 (no shrinking, uses original physical screenshot)
607
+ */
608
+ screenshotShrinkFactor?: number;
609
+ /**
610
+ * Custom OpenAI client factory function
611
+ *
612
+ * If provided, this function will be called to create OpenAI client instances
613
+ * for each AI call, allowing you to:
614
+ * - Wrap clients with observability tools (langsmith, langfuse)
615
+ * - Use custom OpenAI-compatible clients
616
+ * - Apply different configurations based on intent
617
+ *
618
+ * @param config - Resolved model configuration
619
+ * @returns OpenAI client instance (original or wrapped)
620
+ *
621
+ * @example
622
+ * ```typescript
623
+ * createOpenAIClient: async (openai, opts) => {
624
+ * // Wrap with langsmith for planning tasks
625
+ * if (opts.baseURL?.includes('planning')) {
626
+ * return wrapOpenAI(openai, { metadata: { task: 'planning' } });
627
+ * }
628
+ *
629
+ * return openai;
630
+ * }
631
+ * ```
632
+ */
633
+ createOpenAIClient?: CreateOpenAIClientFn;
634
+ }
635
+ export type TestStatus = 'passed' | 'failed' | 'timedOut' | 'skipped' | 'interrupted';
636
+ export interface ReportFileWithAttributes {
637
+ reportFilePath: string;
638
+ reportAttributes: {
639
+ testDuration: number;
640
+ testStatus: TestStatus;
641
+ testTitle: string;
642
+ testId: string;
643
+ testDescription: string;
644
+ };
645
+ }
@@ -0,0 +1,40 @@
1
+ import type { Cache, Rect, ReportDumpWithAttributes } from './types';
2
+ export { appendFileSync } from 'node:fs';
3
+ export declare const groupedActionDumpFileExt = "web-dump.json";
4
+ /**
5
+ * Process cache configuration with environment variable support and backward compatibility.
6
+ *
7
+ * @param cache - The original cache configuration
8
+ * @param cacheId - The cache ID to use as:
9
+ * 1. Fallback ID when cache is true or cache object has no ID
10
+ * 2. Legacy cacheId when cache is undefined (requires MIDSCENE_CACHE env var)
11
+ * @returns Processed cache configuration
12
+ */
13
+ export declare function processCacheConfig(cache: Cache | undefined, cacheId: string): Cache | undefined;
14
+ export declare function getReportTpl(): string;
15
+ /**
16
+ * high performance, insert script before </html> in HTML file
17
+ * only truncate and append, no temporary file
18
+ */
19
+ export declare function insertScriptBeforeClosingHtml(filePath: string, scriptContent: string): void;
20
+ export declare function reportHTMLContent(dumpData: string | ReportDumpWithAttributes, reportPath?: string, appendReport?: boolean, withTpl?: boolean): string;
21
+ export declare function writeDumpReport(fileName: string, dumpData: string | ReportDumpWithAttributes, appendReport?: boolean): string | null;
22
+ export declare function writeLogFile(opts: {
23
+ fileName: string;
24
+ fileExt: string;
25
+ fileContent: string | ReportDumpWithAttributes;
26
+ type: 'dump' | 'cache' | 'report' | 'tmp';
27
+ generateReport?: boolean;
28
+ appendReport?: boolean;
29
+ }): string | null;
30
+ export declare function getTmpDir(): string | null;
31
+ export declare function getTmpFile(fileExtWithoutDot: string): string | null;
32
+ export declare function overlapped(container: Rect, target: Rect): boolean;
33
+ export declare function sleep(ms: number): Promise<unknown>;
34
+ export declare function replacerForPageObject(_key: string, value: any): any;
35
+ export declare function stringifyDumpData(data: any, indents?: number): string;
36
+ export declare function getVersion(): string;
37
+ export declare function uploadTestInfoToServer({ testUrl, serverUrl, }: {
38
+ testUrl: string;
39
+ serverUrl?: string;
40
+ }): Promise<void>;
@@ -0,0 +1,2 @@
1
+ import type { MidsceneYamlScriptWebEnv, MidsceneYamlTask } from '../types';
2
+ export declare function buildYaml(env: MidsceneYamlScriptWebEnv, tasks: MidsceneYamlTask[]): string;
@@ -0,0 +1,4 @@
1
+ export * from '../yaml';
2
+ export * from './player';
3
+ export * from './builder';
4
+ export * from './utils';