@donggui/core 1.5.4-donggui.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (269) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +9 -0
  3. package/dist/es/agent/agent.mjs +709 -0
  4. package/dist/es/agent/agent.mjs.map +1 -0
  5. package/dist/es/agent/common.mjs +0 -0
  6. package/dist/es/agent/execution-session.mjs +41 -0
  7. package/dist/es/agent/execution-session.mjs.map +1 -0
  8. package/dist/es/agent/index.mjs +6 -0
  9. package/dist/es/agent/task-builder.mjs +330 -0
  10. package/dist/es/agent/task-builder.mjs.map +1 -0
  11. package/dist/es/agent/task-cache.mjs +186 -0
  12. package/dist/es/agent/task-cache.mjs.map +1 -0
  13. package/dist/es/agent/tasks.mjs +422 -0
  14. package/dist/es/agent/tasks.mjs.map +1 -0
  15. package/dist/es/agent/ui-utils.mjs +91 -0
  16. package/dist/es/agent/ui-utils.mjs.map +1 -0
  17. package/dist/es/agent/utils.mjs +198 -0
  18. package/dist/es/agent/utils.mjs.map +1 -0
  19. package/dist/es/ai-model/auto-glm/actions.mjs +224 -0
  20. package/dist/es/ai-model/auto-glm/actions.mjs.map +1 -0
  21. package/dist/es/ai-model/auto-glm/index.mjs +6 -0
  22. package/dist/es/ai-model/auto-glm/parser.mjs +239 -0
  23. package/dist/es/ai-model/auto-glm/parser.mjs.map +1 -0
  24. package/dist/es/ai-model/auto-glm/planning.mjs +71 -0
  25. package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -0
  26. package/dist/es/ai-model/auto-glm/prompt.mjs +222 -0
  27. package/dist/es/ai-model/auto-glm/prompt.mjs.map +1 -0
  28. package/dist/es/ai-model/auto-glm/util.mjs +9 -0
  29. package/dist/es/ai-model/auto-glm/util.mjs.map +1 -0
  30. package/dist/es/ai-model/conversation-history.mjs +195 -0
  31. package/dist/es/ai-model/conversation-history.mjs.map +1 -0
  32. package/dist/es/ai-model/index.mjs +11 -0
  33. package/dist/es/ai-model/inspect.mjs +386 -0
  34. package/dist/es/ai-model/inspect.mjs.map +1 -0
  35. package/dist/es/ai-model/llm-planning.mjs +233 -0
  36. package/dist/es/ai-model/llm-planning.mjs.map +1 -0
  37. package/dist/es/ai-model/prompt/common.mjs +7 -0
  38. package/dist/es/ai-model/prompt/common.mjs.map +1 -0
  39. package/dist/es/ai-model/prompt/describe.mjs +66 -0
  40. package/dist/es/ai-model/prompt/describe.mjs.map +1 -0
  41. package/dist/es/ai-model/prompt/extraction.mjs +129 -0
  42. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -0
  43. package/dist/es/ai-model/prompt/llm-locator.mjs +51 -0
  44. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -0
  45. package/dist/es/ai-model/prompt/llm-planning.mjs +364 -0
  46. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -0
  47. package/dist/es/ai-model/prompt/llm-section-locator.mjs +44 -0
  48. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -0
  49. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +35 -0
  50. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -0
  51. package/dist/es/ai-model/prompt/playwright-generator.mjs +117 -0
  52. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -0
  53. package/dist/es/ai-model/prompt/ui-tars-planning.mjs +36 -0
  54. package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -0
  55. package/dist/es/ai-model/prompt/util.mjs +59 -0
  56. package/dist/es/ai-model/prompt/util.mjs.map +1 -0
  57. package/dist/es/ai-model/prompt/yaml-generator.mjs +219 -0
  58. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -0
  59. package/dist/es/ai-model/service-caller/index.mjs +466 -0
  60. package/dist/es/ai-model/service-caller/index.mjs.map +1 -0
  61. package/dist/es/ai-model/ui-tars-planning.mjs +249 -0
  62. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -0
  63. package/dist/es/common.mjs +371 -0
  64. package/dist/es/common.mjs.map +1 -0
  65. package/dist/es/device/device-options.mjs +0 -0
  66. package/dist/es/device/index.mjs +300 -0
  67. package/dist/es/device/index.mjs.map +1 -0
  68. package/dist/es/dump/html-utils.mjs +211 -0
  69. package/dist/es/dump/html-utils.mjs.map +1 -0
  70. package/dist/es/dump/image-restoration.mjs +43 -0
  71. package/dist/es/dump/image-restoration.mjs.map +1 -0
  72. package/dist/es/dump/index.mjs +3 -0
  73. package/dist/es/index.mjs +15 -0
  74. package/dist/es/index.mjs.map +1 -0
  75. package/dist/es/report-generator.mjs +134 -0
  76. package/dist/es/report-generator.mjs.map +1 -0
  77. package/dist/es/report.mjs +111 -0
  78. package/dist/es/report.mjs.map +1 -0
  79. package/dist/es/screenshot-item.mjs +105 -0
  80. package/dist/es/screenshot-item.mjs.map +1 -0
  81. package/dist/es/service/index.mjs +256 -0
  82. package/dist/es/service/index.mjs.map +1 -0
  83. package/dist/es/service/utils.mjs +15 -0
  84. package/dist/es/service/utils.mjs.map +1 -0
  85. package/dist/es/skill/index.mjs +38 -0
  86. package/dist/es/skill/index.mjs.map +1 -0
  87. package/dist/es/task-runner.mjs +258 -0
  88. package/dist/es/task-runner.mjs.map +1 -0
  89. package/dist/es/task-timing.mjs +12 -0
  90. package/dist/es/task-timing.mjs.map +1 -0
  91. package/dist/es/tree.mjs +13 -0
  92. package/dist/es/tree.mjs.map +1 -0
  93. package/dist/es/types.mjs +196 -0
  94. package/dist/es/types.mjs.map +1 -0
  95. package/dist/es/utils.mjs +218 -0
  96. package/dist/es/utils.mjs.map +1 -0
  97. package/dist/es/yaml/builder.mjs +13 -0
  98. package/dist/es/yaml/builder.mjs.map +1 -0
  99. package/dist/es/yaml/index.mjs +4 -0
  100. package/dist/es/yaml/player.mjs +418 -0
  101. package/dist/es/yaml/player.mjs.map +1 -0
  102. package/dist/es/yaml/utils.mjs +73 -0
  103. package/dist/es/yaml/utils.mjs.map +1 -0
  104. package/dist/es/yaml.mjs +0 -0
  105. package/dist/lib/agent/agent.js +757 -0
  106. package/dist/lib/agent/agent.js.map +1 -0
  107. package/dist/lib/agent/common.js +5 -0
  108. package/dist/lib/agent/execution-session.js +75 -0
  109. package/dist/lib/agent/execution-session.js.map +1 -0
  110. package/dist/lib/agent/index.js +81 -0
  111. package/dist/lib/agent/index.js.map +1 -0
  112. package/dist/lib/agent/task-builder.js +367 -0
  113. package/dist/lib/agent/task-builder.js.map +1 -0
  114. package/dist/lib/agent/task-cache.js +238 -0
  115. package/dist/lib/agent/task-cache.js.map +1 -0
  116. package/dist/lib/agent/tasks.js +465 -0
  117. package/dist/lib/agent/tasks.js.map +1 -0
  118. package/dist/lib/agent/ui-utils.js +143 -0
  119. package/dist/lib/agent/ui-utils.js.map +1 -0
  120. package/dist/lib/agent/utils.js +275 -0
  121. package/dist/lib/agent/utils.js.map +1 -0
  122. package/dist/lib/ai-model/auto-glm/actions.js +258 -0
  123. package/dist/lib/ai-model/auto-glm/actions.js.map +1 -0
  124. package/dist/lib/ai-model/auto-glm/index.js +66 -0
  125. package/dist/lib/ai-model/auto-glm/index.js.map +1 -0
  126. package/dist/lib/ai-model/auto-glm/parser.js +282 -0
  127. package/dist/lib/ai-model/auto-glm/parser.js.map +1 -0
  128. package/dist/lib/ai-model/auto-glm/planning.js +105 -0
  129. package/dist/lib/ai-model/auto-glm/planning.js.map +1 -0
  130. package/dist/lib/ai-model/auto-glm/prompt.js +259 -0
  131. package/dist/lib/ai-model/auto-glm/prompt.js.map +1 -0
  132. package/dist/lib/ai-model/auto-glm/util.js +46 -0
  133. package/dist/lib/ai-model/auto-glm/util.js.map +1 -0
  134. package/dist/lib/ai-model/conversation-history.js +229 -0
  135. package/dist/lib/ai-model/conversation-history.js.map +1 -0
  136. package/dist/lib/ai-model/index.js +125 -0
  137. package/dist/lib/ai-model/index.js.map +1 -0
  138. package/dist/lib/ai-model/inspect.js +429 -0
  139. package/dist/lib/ai-model/inspect.js.map +1 -0
  140. package/dist/lib/ai-model/llm-planning.js +270 -0
  141. package/dist/lib/ai-model/llm-planning.js.map +1 -0
  142. package/dist/lib/ai-model/prompt/common.js +41 -0
  143. package/dist/lib/ai-model/prompt/common.js.map +1 -0
  144. package/dist/lib/ai-model/prompt/describe.js +100 -0
  145. package/dist/lib/ai-model/prompt/describe.js.map +1 -0
  146. package/dist/lib/ai-model/prompt/extraction.js +169 -0
  147. package/dist/lib/ai-model/prompt/extraction.js.map +1 -0
  148. package/dist/lib/ai-model/prompt/llm-locator.js +88 -0
  149. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -0
  150. package/dist/lib/ai-model/prompt/llm-planning.js +401 -0
  151. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -0
  152. package/dist/lib/ai-model/prompt/llm-section-locator.js +81 -0
  153. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -0
  154. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +72 -0
  155. package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -0
  156. package/dist/lib/ai-model/prompt/playwright-generator.js +178 -0
  157. package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -0
  158. package/dist/lib/ai-model/prompt/ui-tars-planning.js +73 -0
  159. package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -0
  160. package/dist/lib/ai-model/prompt/util.js +105 -0
  161. package/dist/lib/ai-model/prompt/util.js.map +1 -0
  162. package/dist/lib/ai-model/prompt/yaml-generator.js +280 -0
  163. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -0
  164. package/dist/lib/ai-model/service-caller/index.js +531 -0
  165. package/dist/lib/ai-model/service-caller/index.js.map +1 -0
  166. package/dist/lib/ai-model/ui-tars-planning.js +283 -0
  167. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -0
  168. package/dist/lib/common.js +480 -0
  169. package/dist/lib/common.js.map +1 -0
  170. package/dist/lib/device/device-options.js +20 -0
  171. package/dist/lib/device/device-options.js.map +1 -0
  172. package/dist/lib/device/index.js +418 -0
  173. package/dist/lib/device/index.js.map +1 -0
  174. package/dist/lib/dump/html-utils.js +281 -0
  175. package/dist/lib/dump/html-utils.js.map +1 -0
  176. package/dist/lib/dump/image-restoration.js +77 -0
  177. package/dist/lib/dump/image-restoration.js.map +1 -0
  178. package/dist/lib/dump/index.js +60 -0
  179. package/dist/lib/dump/index.js.map +1 -0
  180. package/dist/lib/index.js +146 -0
  181. package/dist/lib/index.js.map +1 -0
  182. package/dist/lib/report-generator.js +172 -0
  183. package/dist/lib/report-generator.js.map +1 -0
  184. package/dist/lib/report.js +145 -0
  185. package/dist/lib/report.js.map +1 -0
  186. package/dist/lib/screenshot-item.js +139 -0
  187. package/dist/lib/screenshot-item.js.map +1 -0
  188. package/dist/lib/service/index.js +290 -0
  189. package/dist/lib/service/index.js.map +1 -0
  190. package/dist/lib/service/utils.js +49 -0
  191. package/dist/lib/service/utils.js.map +1 -0
  192. package/dist/lib/skill/index.js +72 -0
  193. package/dist/lib/skill/index.js.map +1 -0
  194. package/dist/lib/task-runner.js +295 -0
  195. package/dist/lib/task-runner.js.map +1 -0
  196. package/dist/lib/task-timing.js +46 -0
  197. package/dist/lib/task-timing.js.map +1 -0
  198. package/dist/lib/tree.js +53 -0
  199. package/dist/lib/tree.js.map +1 -0
  200. package/dist/lib/types.js +285 -0
  201. package/dist/lib/types.js.map +1 -0
  202. package/dist/lib/utils.js +297 -0
  203. package/dist/lib/utils.js.map +1 -0
  204. package/dist/lib/yaml/builder.js +57 -0
  205. package/dist/lib/yaml/builder.js.map +1 -0
  206. package/dist/lib/yaml/index.js +81 -0
  207. package/dist/lib/yaml/index.js.map +1 -0
  208. package/dist/lib/yaml/player.js +452 -0
  209. package/dist/lib/yaml/player.js.map +1 -0
  210. package/dist/lib/yaml/utils.js +126 -0
  211. package/dist/lib/yaml/utils.js.map +1 -0
  212. package/dist/lib/yaml.js +20 -0
  213. package/dist/lib/yaml.js.map +1 -0
  214. package/dist/types/agent/agent.d.ts +190 -0
  215. package/dist/types/agent/common.d.ts +0 -0
  216. package/dist/types/agent/execution-session.d.ts +36 -0
  217. package/dist/types/agent/index.d.ts +10 -0
  218. package/dist/types/agent/task-builder.d.ts +34 -0
  219. package/dist/types/agent/task-cache.d.ts +48 -0
  220. package/dist/types/agent/tasks.d.ts +70 -0
  221. package/dist/types/agent/ui-utils.d.ts +14 -0
  222. package/dist/types/agent/utils.d.ts +29 -0
  223. package/dist/types/ai-model/auto-glm/actions.d.ts +77 -0
  224. package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
  225. package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
  226. package/dist/types/ai-model/auto-glm/planning.d.ts +10 -0
  227. package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
  228. package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
  229. package/dist/types/ai-model/conversation-history.d.ts +105 -0
  230. package/dist/types/ai-model/index.d.ts +14 -0
  231. package/dist/types/ai-model/inspect.d.ts +58 -0
  232. package/dist/types/ai-model/llm-planning.d.ts +19 -0
  233. package/dist/types/ai-model/prompt/common.d.ts +2 -0
  234. package/dist/types/ai-model/prompt/describe.d.ts +1 -0
  235. package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
  236. package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
  237. package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
  238. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
  239. package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
  240. package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
  241. package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
  242. package/dist/types/ai-model/prompt/util.d.ts +33 -0
  243. package/dist/types/ai-model/prompt/yaml-generator.d.ts +100 -0
  244. package/dist/types/ai-model/service-caller/index.d.ts +49 -0
  245. package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
  246. package/dist/types/common.d.ts +288 -0
  247. package/dist/types/device/device-options.d.ts +142 -0
  248. package/dist/types/device/index.d.ts +2315 -0
  249. package/dist/types/dump/html-utils.d.ts +52 -0
  250. package/dist/types/dump/image-restoration.d.ts +6 -0
  251. package/dist/types/dump/index.d.ts +5 -0
  252. package/dist/types/index.d.ts +17 -0
  253. package/dist/types/report-generator.d.ts +48 -0
  254. package/dist/types/report.d.ts +15 -0
  255. package/dist/types/screenshot-item.d.ts +66 -0
  256. package/dist/types/service/index.d.ts +23 -0
  257. package/dist/types/service/utils.d.ts +2 -0
  258. package/dist/types/skill/index.d.ts +25 -0
  259. package/dist/types/task-runner.d.ts +48 -0
  260. package/dist/types/task-timing.d.ts +8 -0
  261. package/dist/types/tree.d.ts +4 -0
  262. package/dist/types/types.d.ts +645 -0
  263. package/dist/types/utils.d.ts +40 -0
  264. package/dist/types/yaml/builder.d.ts +2 -0
  265. package/dist/types/yaml/index.d.ts +4 -0
  266. package/dist/types/yaml/player.d.ts +34 -0
  267. package/dist/types/yaml/utils.d.ts +9 -0
  268. package/dist/types/yaml.d.ts +203 -0
  269. package/package.json +111 -0
@@ -0,0 +1,100 @@
1
+ import type { StreamingAIResponse, StreamingCodeGenerationOptions } from '../../types';
2
+ import type { IModelConfig } from '@midscene/shared/env';
3
+ export interface EventCounts {
4
+ navigation: number;
5
+ click: number;
6
+ input: number;
7
+ scroll: number;
8
+ total: number;
9
+ }
10
+ export interface InputDescription {
11
+ description: string;
12
+ value: string;
13
+ }
14
+ export interface ProcessedEvent {
15
+ type: string;
16
+ timestamp: number;
17
+ url?: string;
18
+ title?: string;
19
+ elementDescription?: string;
20
+ value?: string;
21
+ pageInfo?: any;
22
+ elementRect?: any;
23
+ }
24
+ export interface EventSummary {
25
+ testName: string;
26
+ startUrl: string;
27
+ eventCounts: EventCounts;
28
+ urls: string[];
29
+ clickDescriptions: string[];
30
+ inputDescriptions: InputDescription[];
31
+ events: ProcessedEvent[];
32
+ }
33
+ export interface ChromeRecordedEvent {
34
+ type: string;
35
+ timestamp: number;
36
+ url?: string;
37
+ title?: string;
38
+ elementDescription?: string;
39
+ value?: string;
40
+ pageInfo?: any;
41
+ elementRect?: any;
42
+ screenshotBefore?: string;
43
+ screenshotAfter?: string;
44
+ screenshotWithBox?: string;
45
+ }
46
+ export interface YamlGenerationOptions {
47
+ testName?: string;
48
+ includeTimestamps?: boolean;
49
+ maxScreenshots?: number;
50
+ description?: string;
51
+ }
52
+ export interface FilteredEvents {
53
+ navigationEvents: ChromeRecordedEvent[];
54
+ clickEvents: ChromeRecordedEvent[];
55
+ inputEvents: ChromeRecordedEvent[];
56
+ scrollEvents: ChromeRecordedEvent[];
57
+ }
58
+ /**
59
+ * Get screenshots from events for LLM context
60
+ */
61
+ export declare const getScreenshotsForLLM: (events: ChromeRecordedEvent[], maxScreenshots?: number) => string[];
62
+ /**
63
+ * Filter events by type for easier processing
64
+ */
65
+ export declare const filterEventsByType: (events: ChromeRecordedEvent[]) => FilteredEvents;
66
+ /**
67
+ * Create event counts summary
68
+ */
69
+ export declare const createEventCounts: (filteredEvents: FilteredEvents, totalEvents: number) => EventCounts;
70
+ /**
71
+ * Extract input descriptions from input events
72
+ */
73
+ export declare const extractInputDescriptions: (inputEvents: ChromeRecordedEvent[]) => InputDescription[];
74
+ /**
75
+ * Process events for LLM consumption
76
+ */
77
+ export declare const processEventsForLLM: (events: ChromeRecordedEvent[]) => ProcessedEvent[];
78
+ /**
79
+ * Prepare comprehensive event summary for LLM
80
+ */
81
+ export declare const prepareEventSummary: (events: ChromeRecordedEvent[], options?: {
82
+ testName?: string;
83
+ maxScreenshots?: number;
84
+ }) => EventSummary;
85
+ /**
86
+ * Create message content for LLM with optional screenshots
87
+ */
88
+ export declare const createMessageContent: (promptText: string, screenshots?: string[], includeScreenshots?: boolean) => any[];
89
+ /**
90
+ * Validate events before processing
91
+ */
92
+ export declare const validateEvents: (events: ChromeRecordedEvent[]) => void;
93
+ /**
94
+ * Generates YAML test configuration from recorded events using AI
95
+ */
96
+ export declare const generateYamlTest: (events: ChromeRecordedEvent[], options: YamlGenerationOptions, modelConfig: IModelConfig) => Promise<string>;
97
+ /**
98
+ * Generates YAML test configuration from recorded events using AI with streaming support
99
+ */
100
+ export declare const generateYamlTestStream: (events: ChromeRecordedEvent[], options: YamlGenerationOptions & StreamingCodeGenerationOptions, modelConfig: IModelConfig) => Promise<StreamingAIResponse>;
@@ -0,0 +1,49 @@
1
+ import type { AIUsageInfo, DeepThinkOption } from '../../types';
2
+ import type { StreamingCallback } from '../../types';
3
+ export declare class AIResponseParseError extends Error {
4
+ usage?: AIUsageInfo;
5
+ rawResponse: string;
6
+ constructor(message: string, rawResponse: string, usage?: AIUsageInfo);
7
+ }
8
+ import { type IModelConfig, type TModelFamily } from '@midscene/shared/env';
9
+ import type { ChatCompletionMessageParam } from 'openai/resources/index';
10
+ import type { AIArgs } from '../../common';
11
+ export declare function callAI(messages: ChatCompletionMessageParam[], modelConfig: IModelConfig, options?: {
12
+ stream?: boolean;
13
+ onChunk?: StreamingCallback;
14
+ deepThink?: DeepThinkOption;
15
+ abortSignal?: AbortSignal;
16
+ }): Promise<{
17
+ content: string;
18
+ reasoning_content?: string;
19
+ usage?: AIUsageInfo;
20
+ isStreamed: boolean;
21
+ }>;
22
+ export declare function callAIWithObjectResponse<T>(messages: ChatCompletionMessageParam[], modelConfig: IModelConfig, options?: {
23
+ deepThink?: DeepThinkOption;
24
+ abortSignal?: AbortSignal;
25
+ }): Promise<{
26
+ content: T;
27
+ contentString: string;
28
+ usage?: AIUsageInfo;
29
+ reasoning_content?: string;
30
+ }>;
31
+ export declare function callAIWithStringResponse(msgs: AIArgs, modelConfig: IModelConfig, options?: {
32
+ abortSignal?: AbortSignal;
33
+ }): Promise<{
34
+ content: string;
35
+ usage?: AIUsageInfo;
36
+ }>;
37
+ export declare function extractJSONFromCodeBlock(response: string): string;
38
+ export declare function preprocessDoubaoBboxJson(input: string): string;
39
+ export declare function resolveReasoningConfig({ reasoningEnabled, reasoningEffort, reasoningBudget, modelFamily, }: {
40
+ reasoningEnabled?: boolean;
41
+ reasoningEffort?: string;
42
+ reasoningBudget?: number;
43
+ modelFamily?: TModelFamily;
44
+ }): {
45
+ config: Record<string, unknown>;
46
+ debugMessage?: string;
47
+ warningMessage?: string;
48
+ };
49
+ export declare function safeParseJson(input: string, modelFamily: TModelFamily | undefined): any;
@@ -0,0 +1,72 @@
1
+ import type { PlanningAIResponse, UIContext } from '../types';
2
+ import { type IModelConfig } from '@midscene/shared/env';
3
+ import type { ConversationHistory } from './conversation-history';
4
+ type ActionType = 'click' | 'left_double' | 'right_single' | 'drag' | 'type' | 'hotkey' | 'finished' | 'scroll' | 'wait';
5
+ export declare function uiTarsPlanning(userInstruction: string, options: {
6
+ conversationHistory: ConversationHistory;
7
+ context: UIContext;
8
+ modelConfig: IModelConfig;
9
+ actionContext?: string;
10
+ abortSignal?: AbortSignal;
11
+ }): Promise<PlanningAIResponse>;
12
+ interface BaseAction {
13
+ action_type: ActionType;
14
+ action_inputs: Record<string, any>;
15
+ reflection: string | null;
16
+ thought: string | null;
17
+ }
18
+ interface ClickAction extends BaseAction {
19
+ action_type: 'click';
20
+ action_inputs: {
21
+ start_box: string;
22
+ };
23
+ }
24
+ interface DragAction extends BaseAction {
25
+ action_type: 'drag';
26
+ action_inputs: {
27
+ start_box: string;
28
+ end_box: string;
29
+ };
30
+ }
31
+ interface WaitAction extends BaseAction {
32
+ action_type: 'wait';
33
+ action_inputs: {
34
+ time: string;
35
+ };
36
+ }
37
+ interface LeftDoubleAction extends BaseAction {
38
+ action_type: 'left_double';
39
+ action_inputs: {
40
+ start_box: string;
41
+ };
42
+ }
43
+ interface RightSingleAction extends BaseAction {
44
+ action_type: 'right_single';
45
+ action_inputs: {
46
+ start_box: string;
47
+ };
48
+ }
49
+ interface TypeAction extends BaseAction {
50
+ action_type: 'type';
51
+ action_inputs: {
52
+ content: string;
53
+ };
54
+ }
55
+ interface HotkeyAction extends BaseAction {
56
+ action_type: 'hotkey';
57
+ action_inputs: {
58
+ key: string;
59
+ };
60
+ }
61
+ interface ScrollAction extends BaseAction {
62
+ action_type: 'scroll';
63
+ action_inputs: {
64
+ direction: 'up' | 'down';
65
+ };
66
+ }
67
+ interface FinishedAction extends BaseAction {
68
+ action_type: 'finished';
69
+ action_inputs: Record<string, never>;
70
+ }
71
+ export type Action = ClickAction | LeftDoubleAction | RightSingleAction | DragAction | TypeAction | HotkeyAction | ScrollAction | FinishedAction | WaitAction;
72
+ export {};
@@ -0,0 +1,288 @@
1
+ import type { BaseElement, DeviceAction, ElementTreeNode, MidsceneYamlFlowItem, PlanningAction, Rect, Size } from './types';
2
+ import type { ChatCompletionMessageParam } from 'openai/resources/index';
3
+ import type { PlanningLocateParam } from './types';
4
+ import type { TModelFamily } from '@midscene/shared/env';
5
+ import { z } from 'zod';
6
+ export type AIArgs = ChatCompletionMessageParam[];
7
+ type AdaptBboxInput = number[] | string[] | string | (number[] | string[])[];
8
+ /**
9
+ * Convert a point coordinate [0, 1000] to a small bbox [0, 1000]
10
+ * Creates a small bbox around the center point in the same coordinate space
11
+ *
12
+ * @param x - X coordinate in [0, 1000] range
13
+ * @param y - Y coordinate in [0, 1000] range
14
+ * @param bboxSize - Size of the bbox to create (default: 20)
15
+ * @returns [x1, y1, x2, y2] bbox in [0, 1000] coordinate space
16
+ */
17
+ export declare function pointToBbox(x: number, y: number, bboxSize?: number): [number, number, number, number];
18
+ export declare function fillBboxParam(locate: PlanningLocateParam, width: number, height: number, modelFamily: TModelFamily | undefined): PlanningLocateParam;
19
+ export declare function adaptQwen2_5Bbox(bbox: number[]): [number, number, number, number];
20
+ export declare function adaptGpt5Bbox(bbox: number[] | string[] | string): [number, number, number, number];
21
+ export declare function adaptDoubaoBbox(bbox: string[] | number[] | string, width: number, height: number): [number, number, number, number];
22
+ export declare function adaptBbox(bbox: AdaptBboxInput, width: number, height: number, modelFamily: TModelFamily | undefined): [number, number, number, number];
23
+ export declare function normalized01000(bbox: number[], width: number, height: number): [number, number, number, number];
24
+ export declare function adaptGeminiBbox(bbox: number[], width: number, height: number): [number, number, number, number];
25
+ export declare function adaptBboxToRect(bbox: number[], width: number, height: number, offsetX?: number, offsetY?: number, rightLimit?: number, bottomLimit?: number, modelFamily?: TModelFamily | undefined, scale?: number): Rect;
26
+ export declare function mergeRects(rects: Rect[]): {
27
+ left: number;
28
+ top: number;
29
+ width: number;
30
+ height: number;
31
+ };
32
+ /**
33
+ * Expand the search area to at least 400 x 400 pixels
34
+ *
35
+ * Step 1: Extend 100px on each side (top, right, bottom, left)
36
+ * - If the element is near a boundary, expansion on that side will be limited
37
+ * - No compensation is made for boundary limitations (this is intentional)
38
+ *
39
+ * Step 2: Ensure the area is at least 400x400 pixels
40
+ * - Scale up proportionally from the center if needed
41
+ * - Final result is clamped to screen boundaries
42
+ */
43
+ export declare function expandSearchArea(rect: Rect, screenSize: Size): Rect;
44
+ export declare function markupImageForLLM(screenshotBase64: string, tree: ElementTreeNode<BaseElement>, size: Size): Promise<string>;
45
+ export declare function buildYamlFlowFromPlans(plans: PlanningAction[], actionSpace: DeviceAction<any>[]): MidsceneYamlFlowItem[];
46
+ export declare const PointSchema: z.ZodObject<{
47
+ left: z.ZodNumber;
48
+ top: z.ZodNumber;
49
+ }, "strip", z.ZodTypeAny, {
50
+ left: number;
51
+ top: number;
52
+ }, {
53
+ left: number;
54
+ top: number;
55
+ }>;
56
+ export declare const SizeSchema: z.ZodObject<{
57
+ width: z.ZodNumber;
58
+ height: z.ZodNumber;
59
+ }, "strip", z.ZodTypeAny, {
60
+ width: number;
61
+ height: number;
62
+ }, {
63
+ width: number;
64
+ height: number;
65
+ }>;
66
+ export declare const RectSchema: z.ZodIntersection<z.ZodIntersection<z.ZodObject<{
67
+ left: z.ZodNumber;
68
+ top: z.ZodNumber;
69
+ }, "strip", z.ZodTypeAny, {
70
+ left: number;
71
+ top: number;
72
+ }, {
73
+ left: number;
74
+ top: number;
75
+ }>, z.ZodObject<{
76
+ width: z.ZodNumber;
77
+ height: z.ZodNumber;
78
+ }, "strip", z.ZodTypeAny, {
79
+ width: number;
80
+ height: number;
81
+ }, {
82
+ width: number;
83
+ height: number;
84
+ }>>, z.ZodObject<{
85
+ zoom: z.ZodOptional<z.ZodNumber>;
86
+ }, "strip", z.ZodTypeAny, {
87
+ zoom?: number | undefined;
88
+ }, {
89
+ zoom?: number | undefined;
90
+ }>>;
91
+ export declare const TMultimodalPromptSchema: z.ZodObject<{
92
+ images: z.ZodOptional<z.ZodArray<z.ZodObject<{
93
+ name: z.ZodString;
94
+ url: z.ZodString;
95
+ }, "strip", z.ZodTypeAny, {
96
+ name: string;
97
+ url: string;
98
+ }, {
99
+ name: string;
100
+ url: string;
101
+ }>, "many">>;
102
+ convertHttpImage2Base64: z.ZodOptional<z.ZodBoolean>;
103
+ }, "strip", z.ZodTypeAny, {
104
+ images?: {
105
+ name: string;
106
+ url: string;
107
+ }[] | undefined;
108
+ convertHttpImage2Base64?: boolean | undefined;
109
+ }, {
110
+ images?: {
111
+ name: string;
112
+ url: string;
113
+ }[] | undefined;
114
+ convertHttpImage2Base64?: boolean | undefined;
115
+ }>;
116
+ export declare const TUserPromptSchema: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
117
+ prompt: z.ZodString;
118
+ }, "strip", z.ZodTypeAny, {
119
+ prompt: string;
120
+ }, {
121
+ prompt: string;
122
+ }>, z.ZodObject<{
123
+ images: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
124
+ name: z.ZodString;
125
+ url: z.ZodString;
126
+ }, "strip", z.ZodTypeAny, {
127
+ name: string;
128
+ url: string;
129
+ }, {
130
+ name: string;
131
+ url: string;
132
+ }>, "many">>>;
133
+ convertHttpImage2Base64: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
134
+ }, "strip", z.ZodTypeAny, {
135
+ images?: {
136
+ name: string;
137
+ url: string;
138
+ }[] | undefined;
139
+ convertHttpImage2Base64?: boolean | undefined;
140
+ }, {
141
+ images?: {
142
+ name: string;
143
+ url: string;
144
+ }[] | undefined;
145
+ convertHttpImage2Base64?: boolean | undefined;
146
+ }>>]>;
147
+ export type TMultimodalPrompt = z.infer<typeof TMultimodalPromptSchema>;
148
+ export type TUserPrompt = z.infer<typeof TUserPromptSchema>;
149
+ /**
150
+ * Returns the schema for locator fields.
151
+ * This now returns the input schema which is more permissive and suitable for validation.
152
+ */
153
+ export declare const getMidsceneLocationSchema: () => z.ZodObject<{
154
+ prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
155
+ prompt: z.ZodString;
156
+ }, "strip", z.ZodTypeAny, {
157
+ prompt: string;
158
+ }, {
159
+ prompt: string;
160
+ }>, z.ZodObject<{
161
+ images: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
162
+ name: z.ZodString;
163
+ url: z.ZodString;
164
+ }, "strip", z.ZodTypeAny, {
165
+ name: string;
166
+ url: string;
167
+ }, {
168
+ name: string;
169
+ url: string;
170
+ }>, "many">>>;
171
+ convertHttpImage2Base64: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
172
+ }, "strip", z.ZodTypeAny, {
173
+ images?: {
174
+ name: string;
175
+ url: string;
176
+ }[] | undefined;
177
+ convertHttpImage2Base64?: boolean | undefined;
178
+ }, {
179
+ images?: {
180
+ name: string;
181
+ url: string;
182
+ }[] | undefined;
183
+ convertHttpImage2Base64?: boolean | undefined;
184
+ }>>]>;
185
+ deepLocate: z.ZodOptional<z.ZodBoolean>;
186
+ deepThink: z.ZodOptional<z.ZodBoolean>;
187
+ cacheable: z.ZodOptional<z.ZodBoolean>;
188
+ xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
189
+ }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
190
+ prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
191
+ prompt: z.ZodString;
192
+ }, "strip", z.ZodTypeAny, {
193
+ prompt: string;
194
+ }, {
195
+ prompt: string;
196
+ }>, z.ZodObject<{
197
+ images: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
198
+ name: z.ZodString;
199
+ url: z.ZodString;
200
+ }, "strip", z.ZodTypeAny, {
201
+ name: string;
202
+ url: string;
203
+ }, {
204
+ name: string;
205
+ url: string;
206
+ }>, "many">>>;
207
+ convertHttpImage2Base64: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
208
+ }, "strip", z.ZodTypeAny, {
209
+ images?: {
210
+ name: string;
211
+ url: string;
212
+ }[] | undefined;
213
+ convertHttpImage2Base64?: boolean | undefined;
214
+ }, {
215
+ images?: {
216
+ name: string;
217
+ url: string;
218
+ }[] | undefined;
219
+ convertHttpImage2Base64?: boolean | undefined;
220
+ }>>]>;
221
+ deepLocate: z.ZodOptional<z.ZodBoolean>;
222
+ deepThink: z.ZodOptional<z.ZodBoolean>;
223
+ cacheable: z.ZodOptional<z.ZodBoolean>;
224
+ xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
225
+ }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
226
+ prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
227
+ prompt: z.ZodString;
228
+ }, "strip", z.ZodTypeAny, {
229
+ prompt: string;
230
+ }, {
231
+ prompt: string;
232
+ }>, z.ZodObject<{
233
+ images: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
234
+ name: z.ZodString;
235
+ url: z.ZodString;
236
+ }, "strip", z.ZodTypeAny, {
237
+ name: string;
238
+ url: string;
239
+ }, {
240
+ name: string;
241
+ url: string;
242
+ }>, "many">>>;
243
+ convertHttpImage2Base64: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
244
+ }, "strip", z.ZodTypeAny, {
245
+ images?: {
246
+ name: string;
247
+ url: string;
248
+ }[] | undefined;
249
+ convertHttpImage2Base64?: boolean | undefined;
250
+ }, {
251
+ images?: {
252
+ name: string;
253
+ url: string;
254
+ }[] | undefined;
255
+ convertHttpImage2Base64?: boolean | undefined;
256
+ }>>]>;
257
+ deepLocate: z.ZodOptional<z.ZodBoolean>;
258
+ deepThink: z.ZodOptional<z.ZodBoolean>;
259
+ cacheable: z.ZodOptional<z.ZodBoolean>;
260
+ xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
261
+ }, z.ZodTypeAny, "passthrough">>;
262
+ export declare const ifMidsceneLocatorField: (field: any) => boolean;
263
+ export declare const dumpMidsceneLocatorField: (field: any) => string;
264
+ export declare const findAllMidsceneLocatorField: (zodType?: z.ZodType<any>, requiredOnly?: boolean) => string[];
265
+ export declare const dumpActionParam: (jsonObject: Record<string, any>, zodSchema: z.ZodType<any>) => Record<string, any>;
266
+ /**
267
+ * Parse and validate action parameters using Zod schema.
268
+ * All fields are validated through Zod, EXCEPT locator fields which are skipped.
269
+ * Default values defined in the schema are automatically applied.
270
+ *
271
+ * Locator fields are special business logic fields with complex validation requirements,
272
+ * so they are intentionally excluded from Zod parsing and use existing validation logic.
273
+ *
274
+ * When shrunkShotToLogicalRatio is provided and !== 1, coordinates in locate fields
275
+ * are transformed from screenshot space to logical space.
276
+ */
277
+ export declare const parseActionParam: (rawParam: Record<string, any> | undefined, zodSchema?: z.ZodType<any>, options?: {
278
+ shrunkShotToLogicalRatio?: number;
279
+ }) => Record<string, any> | undefined;
280
+ export declare const finalizeActionName = "Finalize";
281
+ /**
282
+ * Get a readable time string for a given timestamp or the current time
283
+ * @param format - Optional format string. Supports: YYYY, MM, DD, HH, mm, ss. Default: 'YYYY-MM-DD HH:mm:ss'
284
+ * @param timestamp - Optional timestamp in milliseconds. If not provided, uses current system time.
285
+ * @returns A formatted time string with format label
286
+ */
287
+ export declare const getReadableTimeString: (format?: string, timestamp?: number) => string;
288
+ export {};
@@ -0,0 +1,142 @@
1
+ import type { DeviceAction } from '../types';
2
+ /**
3
+ * Android device input options
4
+ */
5
+ export type AndroidDeviceInputOpt = {
6
+ /** Automatically dismiss the keyboard after input is completed */
7
+ autoDismissKeyboard?: boolean;
8
+ /** Strategy for dismissing the keyboard: 'esc-first' tries ESC before BACK, 'back-first' tries BACK before ESC */
9
+ keyboardDismissStrategy?: 'esc-first' | 'back-first';
10
+ };
11
+ /**
12
+ * Android device options
13
+ */
14
+ export type AndroidDeviceOpt = {
15
+ /** Path to the ADB executable */
16
+ androidAdbPath?: string;
17
+ /** Remote ADB host address */
18
+ remoteAdbHost?: string;
19
+ /** Remote ADB port */
20
+ remoteAdbPort?: number;
21
+ /** Input method editor strategy: 'always-yadb' always uses yadb, 'yadb-for-non-ascii' uses yadb only for non-ASCII characters */
22
+ imeStrategy?: 'always-yadb' | 'yadb-for-non-ascii';
23
+ /** Display ID to use for this device */
24
+ displayId?: number;
25
+ /** Use physical display ID for screenshot operations */
26
+ usePhysicalDisplayIdForScreenshot?: boolean;
27
+ /** Use physical display ID when looking up display information */
28
+ usePhysicalDisplayIdForDisplayLookup?: boolean;
29
+ /** Custom device actions to register */
30
+ customActions?: DeviceAction<any>[];
31
+ /**
32
+ * @deprecated Use `screenshotShrinkFactor` in AgentOpt instead.
33
+ * This option no longer affects screenshot size sent to AI model.
34
+ */
35
+ screenshotResizeScale?: number;
36
+ /** Always fetch screen info on each call; if false, cache the first result */
37
+ alwaysRefreshScreenInfo?: boolean;
38
+ /** Minimum screenshot buffer size in bytes (default: 10240 = 10KB). Set to 0 to disable validation. */
39
+ minScreenshotBufferSize?: number;
40
+ /**
41
+ * Scrcpy screenshot configuration for high-performance screen capture.
42
+ *
43
+ * Scrcpy provides 6-8x faster screenshots by streaming H.264 video from the device.
44
+ * When enabled, scrcpy will:
45
+ * 1. Start a video stream from the device on first screenshot request
46
+ * 2. Keep the connection alive for subsequent screenshots (16-50ms each)
47
+ * 3. Automatically disconnect after idle timeout to save resources
48
+ * 4. Fallback to standard ADB mode if unavailable
49
+ *
50
+ * @example
51
+ * ```typescript
52
+ * // Enable scrcpy for high-performance screenshots
53
+ * const device = new AndroidDevice(deviceId, {
54
+ * scrcpyConfig: {
55
+ * enabled: true,
56
+ * },
57
+ * });
58
+ *
59
+ * // Custom configuration
60
+ * const device = new AndroidDevice(deviceId, {
61
+ * scrcpyConfig: {
62
+ * enabled: true,
63
+ * maxSize: 0, // 0 = no scaling
64
+ * idleTimeoutMs: 30000,
65
+ * videoBitRate: 8_000_000,
66
+ * },
67
+ * });
68
+ * ```
69
+ */
70
+ scrcpyConfig?: {
71
+ /**
72
+ * Enable scrcpy for high-performance screenshots.
73
+ * @default false
74
+ */
75
+ enabled?: boolean;
76
+ /**
77
+ * Maximum video dimension (width or height).
78
+ * Video stream will be scaled down if device resolution exceeds this value.
79
+ * Lower values reduce bandwidth but may affect image quality.
80
+ *
81
+ * @default 0 (no scaling, use original resolution)
82
+ * @example
83
+ * { maxSize: 1024 } // Always scale to 1024
84
+ */
85
+ maxSize?: number;
86
+ /**
87
+ * Idle timeout in milliseconds before disconnecting scrcpy.
88
+ * Connection auto-closes after this period of inactivity to save resources.
89
+ * Set to 0 to disable auto-disconnect.
90
+ * @default 30000 (30 seconds)
91
+ */
92
+ idleTimeoutMs?: number;
93
+ /**
94
+ * Video bit rate for H.264 encoding in bits per second.
95
+ * Higher values improve quality but increase bandwidth usage.
96
+ * @default 2000000 (2 Mbps)
97
+ */
98
+ videoBitRate?: number;
99
+ };
100
+ } & AndroidDeviceInputOpt;
101
+ /**
102
+ * iOS device input options
103
+ */
104
+ export type IOSDeviceInputOpt = {
105
+ /** Automatically dismiss the keyboard after input is completed */
106
+ autoDismissKeyboard?: boolean;
107
+ };
108
+ /**
109
+ * iOS device options
110
+ */
111
+ export type IOSDeviceOpt = {
112
+ /** Device ID (UDID) to connect to */
113
+ deviceId?: string;
114
+ /** Custom device actions to register */
115
+ customActions?: DeviceAction<any>[];
116
+ /** WebDriverAgent port (default: 8100) */
117
+ wdaPort?: number;
118
+ /** WebDriverAgent host (default: 'localhost') */
119
+ wdaHost?: string;
120
+ /** Whether to use WebDriverAgent */
121
+ useWDA?: boolean;
122
+ /** WDA MJPEG server port for real-time screen streaming (default: 9100) */
123
+ wdaMjpegPort?: number;
124
+ } & IOSDeviceInputOpt;
125
+ /**
126
+ * HarmonyOS device input options
127
+ */
128
+ export type HarmonyDeviceInputOpt = {
129
+ /** Automatically dismiss the keyboard after input is completed */
130
+ autoDismissKeyboard?: boolean;
131
+ };
132
+ /**
133
+ * HarmonyOS device options
134
+ */
135
+ export type HarmonyDeviceOpt = {
136
+ /** Path to the HDC executable */
137
+ hdcPath?: string;
138
+ /** Custom device actions to register */
139
+ customActions?: DeviceAction<any>[];
140
+ /** Screenshot resize scale factor */
141
+ screenshotResizeScale?: number;
142
+ } & HarmonyDeviceInputOpt;