@donggui/core 1.5.4-donggui.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (269) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +9 -0
  3. package/dist/es/agent/agent.mjs +709 -0
  4. package/dist/es/agent/agent.mjs.map +1 -0
  5. package/dist/es/agent/common.mjs +0 -0
  6. package/dist/es/agent/execution-session.mjs +41 -0
  7. package/dist/es/agent/execution-session.mjs.map +1 -0
  8. package/dist/es/agent/index.mjs +6 -0
  9. package/dist/es/agent/task-builder.mjs +330 -0
  10. package/dist/es/agent/task-builder.mjs.map +1 -0
  11. package/dist/es/agent/task-cache.mjs +186 -0
  12. package/dist/es/agent/task-cache.mjs.map +1 -0
  13. package/dist/es/agent/tasks.mjs +422 -0
  14. package/dist/es/agent/tasks.mjs.map +1 -0
  15. package/dist/es/agent/ui-utils.mjs +91 -0
  16. package/dist/es/agent/ui-utils.mjs.map +1 -0
  17. package/dist/es/agent/utils.mjs +198 -0
  18. package/dist/es/agent/utils.mjs.map +1 -0
  19. package/dist/es/ai-model/auto-glm/actions.mjs +224 -0
  20. package/dist/es/ai-model/auto-glm/actions.mjs.map +1 -0
  21. package/dist/es/ai-model/auto-glm/index.mjs +6 -0
  22. package/dist/es/ai-model/auto-glm/parser.mjs +239 -0
  23. package/dist/es/ai-model/auto-glm/parser.mjs.map +1 -0
  24. package/dist/es/ai-model/auto-glm/planning.mjs +71 -0
  25. package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -0
  26. package/dist/es/ai-model/auto-glm/prompt.mjs +222 -0
  27. package/dist/es/ai-model/auto-glm/prompt.mjs.map +1 -0
  28. package/dist/es/ai-model/auto-glm/util.mjs +9 -0
  29. package/dist/es/ai-model/auto-glm/util.mjs.map +1 -0
  30. package/dist/es/ai-model/conversation-history.mjs +195 -0
  31. package/dist/es/ai-model/conversation-history.mjs.map +1 -0
  32. package/dist/es/ai-model/index.mjs +11 -0
  33. package/dist/es/ai-model/inspect.mjs +386 -0
  34. package/dist/es/ai-model/inspect.mjs.map +1 -0
  35. package/dist/es/ai-model/llm-planning.mjs +233 -0
  36. package/dist/es/ai-model/llm-planning.mjs.map +1 -0
  37. package/dist/es/ai-model/prompt/common.mjs +7 -0
  38. package/dist/es/ai-model/prompt/common.mjs.map +1 -0
  39. package/dist/es/ai-model/prompt/describe.mjs +66 -0
  40. package/dist/es/ai-model/prompt/describe.mjs.map +1 -0
  41. package/dist/es/ai-model/prompt/extraction.mjs +129 -0
  42. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -0
  43. package/dist/es/ai-model/prompt/llm-locator.mjs +51 -0
  44. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -0
  45. package/dist/es/ai-model/prompt/llm-planning.mjs +364 -0
  46. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -0
  47. package/dist/es/ai-model/prompt/llm-section-locator.mjs +44 -0
  48. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -0
  49. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +35 -0
  50. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -0
  51. package/dist/es/ai-model/prompt/playwright-generator.mjs +117 -0
  52. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -0
  53. package/dist/es/ai-model/prompt/ui-tars-planning.mjs +36 -0
  54. package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -0
  55. package/dist/es/ai-model/prompt/util.mjs +59 -0
  56. package/dist/es/ai-model/prompt/util.mjs.map +1 -0
  57. package/dist/es/ai-model/prompt/yaml-generator.mjs +219 -0
  58. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -0
  59. package/dist/es/ai-model/service-caller/index.mjs +466 -0
  60. package/dist/es/ai-model/service-caller/index.mjs.map +1 -0
  61. package/dist/es/ai-model/ui-tars-planning.mjs +249 -0
  62. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -0
  63. package/dist/es/common.mjs +371 -0
  64. package/dist/es/common.mjs.map +1 -0
  65. package/dist/es/device/device-options.mjs +0 -0
  66. package/dist/es/device/index.mjs +300 -0
  67. package/dist/es/device/index.mjs.map +1 -0
  68. package/dist/es/dump/html-utils.mjs +211 -0
  69. package/dist/es/dump/html-utils.mjs.map +1 -0
  70. package/dist/es/dump/image-restoration.mjs +43 -0
  71. package/dist/es/dump/image-restoration.mjs.map +1 -0
  72. package/dist/es/dump/index.mjs +3 -0
  73. package/dist/es/index.mjs +15 -0
  74. package/dist/es/index.mjs.map +1 -0
  75. package/dist/es/report-generator.mjs +134 -0
  76. package/dist/es/report-generator.mjs.map +1 -0
  77. package/dist/es/report.mjs +111 -0
  78. package/dist/es/report.mjs.map +1 -0
  79. package/dist/es/screenshot-item.mjs +105 -0
  80. package/dist/es/screenshot-item.mjs.map +1 -0
  81. package/dist/es/service/index.mjs +256 -0
  82. package/dist/es/service/index.mjs.map +1 -0
  83. package/dist/es/service/utils.mjs +15 -0
  84. package/dist/es/service/utils.mjs.map +1 -0
  85. package/dist/es/skill/index.mjs +38 -0
  86. package/dist/es/skill/index.mjs.map +1 -0
  87. package/dist/es/task-runner.mjs +258 -0
  88. package/dist/es/task-runner.mjs.map +1 -0
  89. package/dist/es/task-timing.mjs +12 -0
  90. package/dist/es/task-timing.mjs.map +1 -0
  91. package/dist/es/tree.mjs +13 -0
  92. package/dist/es/tree.mjs.map +1 -0
  93. package/dist/es/types.mjs +196 -0
  94. package/dist/es/types.mjs.map +1 -0
  95. package/dist/es/utils.mjs +218 -0
  96. package/dist/es/utils.mjs.map +1 -0
  97. package/dist/es/yaml/builder.mjs +13 -0
  98. package/dist/es/yaml/builder.mjs.map +1 -0
  99. package/dist/es/yaml/index.mjs +4 -0
  100. package/dist/es/yaml/player.mjs +418 -0
  101. package/dist/es/yaml/player.mjs.map +1 -0
  102. package/dist/es/yaml/utils.mjs +73 -0
  103. package/dist/es/yaml/utils.mjs.map +1 -0
  104. package/dist/es/yaml.mjs +0 -0
  105. package/dist/lib/agent/agent.js +757 -0
  106. package/dist/lib/agent/agent.js.map +1 -0
  107. package/dist/lib/agent/common.js +5 -0
  108. package/dist/lib/agent/execution-session.js +75 -0
  109. package/dist/lib/agent/execution-session.js.map +1 -0
  110. package/dist/lib/agent/index.js +81 -0
  111. package/dist/lib/agent/index.js.map +1 -0
  112. package/dist/lib/agent/task-builder.js +367 -0
  113. package/dist/lib/agent/task-builder.js.map +1 -0
  114. package/dist/lib/agent/task-cache.js +238 -0
  115. package/dist/lib/agent/task-cache.js.map +1 -0
  116. package/dist/lib/agent/tasks.js +465 -0
  117. package/dist/lib/agent/tasks.js.map +1 -0
  118. package/dist/lib/agent/ui-utils.js +143 -0
  119. package/dist/lib/agent/ui-utils.js.map +1 -0
  120. package/dist/lib/agent/utils.js +275 -0
  121. package/dist/lib/agent/utils.js.map +1 -0
  122. package/dist/lib/ai-model/auto-glm/actions.js +258 -0
  123. package/dist/lib/ai-model/auto-glm/actions.js.map +1 -0
  124. package/dist/lib/ai-model/auto-glm/index.js +66 -0
  125. package/dist/lib/ai-model/auto-glm/index.js.map +1 -0
  126. package/dist/lib/ai-model/auto-glm/parser.js +282 -0
  127. package/dist/lib/ai-model/auto-glm/parser.js.map +1 -0
  128. package/dist/lib/ai-model/auto-glm/planning.js +105 -0
  129. package/dist/lib/ai-model/auto-glm/planning.js.map +1 -0
  130. package/dist/lib/ai-model/auto-glm/prompt.js +259 -0
  131. package/dist/lib/ai-model/auto-glm/prompt.js.map +1 -0
  132. package/dist/lib/ai-model/auto-glm/util.js +46 -0
  133. package/dist/lib/ai-model/auto-glm/util.js.map +1 -0
  134. package/dist/lib/ai-model/conversation-history.js +229 -0
  135. package/dist/lib/ai-model/conversation-history.js.map +1 -0
  136. package/dist/lib/ai-model/index.js +125 -0
  137. package/dist/lib/ai-model/index.js.map +1 -0
  138. package/dist/lib/ai-model/inspect.js +429 -0
  139. package/dist/lib/ai-model/inspect.js.map +1 -0
  140. package/dist/lib/ai-model/llm-planning.js +270 -0
  141. package/dist/lib/ai-model/llm-planning.js.map +1 -0
  142. package/dist/lib/ai-model/prompt/common.js +41 -0
  143. package/dist/lib/ai-model/prompt/common.js.map +1 -0
  144. package/dist/lib/ai-model/prompt/describe.js +100 -0
  145. package/dist/lib/ai-model/prompt/describe.js.map +1 -0
  146. package/dist/lib/ai-model/prompt/extraction.js +169 -0
  147. package/dist/lib/ai-model/prompt/extraction.js.map +1 -0
  148. package/dist/lib/ai-model/prompt/llm-locator.js +88 -0
  149. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -0
  150. package/dist/lib/ai-model/prompt/llm-planning.js +401 -0
  151. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -0
  152. package/dist/lib/ai-model/prompt/llm-section-locator.js +81 -0
  153. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -0
  154. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +72 -0
  155. package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -0
  156. package/dist/lib/ai-model/prompt/playwright-generator.js +178 -0
  157. package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -0
  158. package/dist/lib/ai-model/prompt/ui-tars-planning.js +73 -0
  159. package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -0
  160. package/dist/lib/ai-model/prompt/util.js +105 -0
  161. package/dist/lib/ai-model/prompt/util.js.map +1 -0
  162. package/dist/lib/ai-model/prompt/yaml-generator.js +280 -0
  163. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -0
  164. package/dist/lib/ai-model/service-caller/index.js +531 -0
  165. package/dist/lib/ai-model/service-caller/index.js.map +1 -0
  166. package/dist/lib/ai-model/ui-tars-planning.js +283 -0
  167. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -0
  168. package/dist/lib/common.js +480 -0
  169. package/dist/lib/common.js.map +1 -0
  170. package/dist/lib/device/device-options.js +20 -0
  171. package/dist/lib/device/device-options.js.map +1 -0
  172. package/dist/lib/device/index.js +418 -0
  173. package/dist/lib/device/index.js.map +1 -0
  174. package/dist/lib/dump/html-utils.js +281 -0
  175. package/dist/lib/dump/html-utils.js.map +1 -0
  176. package/dist/lib/dump/image-restoration.js +77 -0
  177. package/dist/lib/dump/image-restoration.js.map +1 -0
  178. package/dist/lib/dump/index.js +60 -0
  179. package/dist/lib/dump/index.js.map +1 -0
  180. package/dist/lib/index.js +146 -0
  181. package/dist/lib/index.js.map +1 -0
  182. package/dist/lib/report-generator.js +172 -0
  183. package/dist/lib/report-generator.js.map +1 -0
  184. package/dist/lib/report.js +145 -0
  185. package/dist/lib/report.js.map +1 -0
  186. package/dist/lib/screenshot-item.js +139 -0
  187. package/dist/lib/screenshot-item.js.map +1 -0
  188. package/dist/lib/service/index.js +290 -0
  189. package/dist/lib/service/index.js.map +1 -0
  190. package/dist/lib/service/utils.js +49 -0
  191. package/dist/lib/service/utils.js.map +1 -0
  192. package/dist/lib/skill/index.js +72 -0
  193. package/dist/lib/skill/index.js.map +1 -0
  194. package/dist/lib/task-runner.js +295 -0
  195. package/dist/lib/task-runner.js.map +1 -0
  196. package/dist/lib/task-timing.js +46 -0
  197. package/dist/lib/task-timing.js.map +1 -0
  198. package/dist/lib/tree.js +53 -0
  199. package/dist/lib/tree.js.map +1 -0
  200. package/dist/lib/types.js +285 -0
  201. package/dist/lib/types.js.map +1 -0
  202. package/dist/lib/utils.js +297 -0
  203. package/dist/lib/utils.js.map +1 -0
  204. package/dist/lib/yaml/builder.js +57 -0
  205. package/dist/lib/yaml/builder.js.map +1 -0
  206. package/dist/lib/yaml/index.js +81 -0
  207. package/dist/lib/yaml/index.js.map +1 -0
  208. package/dist/lib/yaml/player.js +452 -0
  209. package/dist/lib/yaml/player.js.map +1 -0
  210. package/dist/lib/yaml/utils.js +126 -0
  211. package/dist/lib/yaml/utils.js.map +1 -0
  212. package/dist/lib/yaml.js +20 -0
  213. package/dist/lib/yaml.js.map +1 -0
  214. package/dist/types/agent/agent.d.ts +190 -0
  215. package/dist/types/agent/common.d.ts +0 -0
  216. package/dist/types/agent/execution-session.d.ts +36 -0
  217. package/dist/types/agent/index.d.ts +10 -0
  218. package/dist/types/agent/task-builder.d.ts +34 -0
  219. package/dist/types/agent/task-cache.d.ts +48 -0
  220. package/dist/types/agent/tasks.d.ts +70 -0
  221. package/dist/types/agent/ui-utils.d.ts +14 -0
  222. package/dist/types/agent/utils.d.ts +29 -0
  223. package/dist/types/ai-model/auto-glm/actions.d.ts +77 -0
  224. package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
  225. package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
  226. package/dist/types/ai-model/auto-glm/planning.d.ts +10 -0
  227. package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
  228. package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
  229. package/dist/types/ai-model/conversation-history.d.ts +105 -0
  230. package/dist/types/ai-model/index.d.ts +14 -0
  231. package/dist/types/ai-model/inspect.d.ts +58 -0
  232. package/dist/types/ai-model/llm-planning.d.ts +19 -0
  233. package/dist/types/ai-model/prompt/common.d.ts +2 -0
  234. package/dist/types/ai-model/prompt/describe.d.ts +1 -0
  235. package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
  236. package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
  237. package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
  238. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
  239. package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
  240. package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
  241. package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
  242. package/dist/types/ai-model/prompt/util.d.ts +33 -0
  243. package/dist/types/ai-model/prompt/yaml-generator.d.ts +100 -0
  244. package/dist/types/ai-model/service-caller/index.d.ts +49 -0
  245. package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
  246. package/dist/types/common.d.ts +288 -0
  247. package/dist/types/device/device-options.d.ts +142 -0
  248. package/dist/types/device/index.d.ts +2315 -0
  249. package/dist/types/dump/html-utils.d.ts +52 -0
  250. package/dist/types/dump/image-restoration.d.ts +6 -0
  251. package/dist/types/dump/index.d.ts +5 -0
  252. package/dist/types/index.d.ts +17 -0
  253. package/dist/types/report-generator.d.ts +48 -0
  254. package/dist/types/report.d.ts +15 -0
  255. package/dist/types/screenshot-item.d.ts +66 -0
  256. package/dist/types/service/index.d.ts +23 -0
  257. package/dist/types/service/utils.d.ts +2 -0
  258. package/dist/types/skill/index.d.ts +25 -0
  259. package/dist/types/task-runner.d.ts +48 -0
  260. package/dist/types/task-timing.d.ts +8 -0
  261. package/dist/types/tree.d.ts +4 -0
  262. package/dist/types/types.d.ts +645 -0
  263. package/dist/types/utils.d.ts +40 -0
  264. package/dist/types/yaml/builder.d.ts +2 -0
  265. package/dist/types/yaml/index.d.ts +4 -0
  266. package/dist/types/yaml/player.d.ts +34 -0
  267. package/dist/types/yaml/utils.d.ts +9 -0
  268. package/dist/types/yaml.d.ts +203 -0
  269. package/package.json +111 -0
@@ -0,0 +1,70 @@
1
+ import { type TMultimodalPrompt, type TUserPrompt } from '../common';
2
+ import type { AbstractInterface } from '../device';
3
+ import type Service from '../service';
4
+ import type { TaskRunner } from '../task-runner';
5
+ import { TaskExecutionError } from '../task-runner';
6
+ import type { DeepThinkOption, DeviceAction, ExecutionTaskApply, ExecutionTaskProgressOptions, MidsceneYamlFlowItem, PlanningAction, PlanningActionParamWaitFor, ServiceExtractOption, ServiceExtractParam } from '../types';
7
+ import { type IModelConfig } from '@midscene/shared/env';
8
+ import type { TaskCache } from './task-cache';
9
+ export { locatePlanForLocate } from './task-builder';
10
+ interface ExecutionResult<OutputType = any> {
11
+ output: OutputType;
12
+ thought?: string;
13
+ runner: TaskRunner;
14
+ }
15
+ interface TaskExecutorHooks {
16
+ onTaskUpdate?: (runner: TaskRunner, error?: TaskExecutionError) => Promise<void> | void;
17
+ }
18
+ export { TaskExecutionError };
19
+ export declare class TaskExecutor {
20
+ interface: AbstractInterface;
21
+ service: Service;
22
+ taskCache?: TaskCache;
23
+ private readonly providedActionSpace;
24
+ private readonly taskBuilder;
25
+ private conversationHistory;
26
+ onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];
27
+ private readonly hooks?;
28
+ replanningCycleLimit?: number;
29
+ waitAfterAction?: number;
30
+ useDeviceTimestamp?: boolean;
31
+ get page(): AbstractInterface;
32
+ constructor(interfaceInstance: AbstractInterface, service: Service, opts: {
33
+ taskCache?: TaskCache;
34
+ onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];
35
+ replanningCycleLimit?: number;
36
+ waitAfterAction?: number;
37
+ useDeviceTimestamp?: boolean;
38
+ hooks?: TaskExecutorHooks;
39
+ actionSpace: DeviceAction[];
40
+ });
41
+ private createExecutionSession;
42
+ private getActionSpace;
43
+ /**
44
+ * Get a readable time string using device time when configured.
45
+ * This method respects the useDeviceTimestamp configuration.
46
+ * @param format - Optional format string
47
+ * @returns A formatted time string
48
+ */
49
+ private getTimeString;
50
+ convertPlanToExecutable(plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, options?: {
51
+ cacheable?: boolean;
52
+ deepLocate?: boolean;
53
+ abortSignal?: AbortSignal;
54
+ }): Promise<{
55
+ tasks: ExecutionTaskApply[];
56
+ }>;
57
+ loadYamlFlowAsPlanning(userInstruction: string, yamlString: string): Promise<{
58
+ runner: TaskRunner;
59
+ }>;
60
+ runPlans(title: string, plans: PlanningAction[], modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig): Promise<ExecutionResult>;
61
+ action(userPrompt: string, modelConfigForPlanning: IModelConfig, modelConfigForDefaultIntent: IModelConfig, includeBboxInPlanning: boolean, aiActContext?: string, cacheable?: boolean, replanningCycleLimitOverride?: number, imagesIncludeCount?: number, deepThink?: DeepThinkOption, fileChooserAccept?: string[], deepLocate?: boolean, abortSignal?: AbortSignal): Promise<ExecutionResult<{
62
+ yamlFlow?: MidsceneYamlFlowItem[];
63
+ output?: string;
64
+ } | undefined>>;
65
+ private runAction;
66
+ private createTypeQueryTask;
67
+ createTypeQueryExecution<T>(type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert', demand: ServiceExtractParam, modelConfig: IModelConfig, opt?: ServiceExtractOption, multimodalPrompt?: TMultimodalPrompt): Promise<ExecutionResult<T>>;
68
+ waitFor(assertion: TUserPrompt, opt: PlanningActionParamWaitFor, modelConfig: IModelConfig): Promise<ExecutionResult<void>>;
69
+ }
70
+ export declare function withFileChooser<T>(interfaceInstance: AbstractInterface, fileChooserAccept: string[] | undefined, action: () => Promise<T>): Promise<T>;
@@ -0,0 +1,14 @@
1
+ import type { DetailedLocateParam, ExecutionTask, PullParam, ScrollParam } from '../types';
2
+ export declare function typeStr(task: ExecutionTask): any;
3
+ export declare function locateParamStr(locate?: DetailedLocateParam | string): string;
4
+ export declare function scrollParamStr(scrollParam?: ScrollParam): string;
5
+ export declare function pullParamStr(pullParam?: PullParam): string;
6
+ export declare function extractInsightParam(taskParam: any): {
7
+ content: string;
8
+ images?: Array<{
9
+ name: string;
10
+ url: string;
11
+ }>;
12
+ };
13
+ export declare function taskTitleStr(type: 'Tap' | 'Hover' | 'Input' | 'RightClick' | 'KeyboardPress' | 'Scroll' | 'Act' | 'Query' | 'Assert' | 'WaitFor' | 'Locate' | 'Boolean' | 'Number' | 'String', prompt: string): string;
14
+ export declare function paramStr(task: ExecutionTask): string;
@@ -0,0 +1,29 @@
1
+ import type { TMultimodalPrompt, TUserPrompt } from '../common';
2
+ import type { AbstractInterface } from '../device';
3
+ import type { ElementCacheFeature, LocateResultElement, PlanningLocateParam, Rect, UIContext } from '../types';
4
+ import type { TaskCache } from './task-cache';
5
+ export declare function commonContextParser(interfaceInstance: AbstractInterface, _opt: {
6
+ uploadServerUrl?: string;
7
+ screenshotShrinkFactor?: number;
8
+ }): Promise<UIContext>;
9
+ export declare function getReportFileName(tag?: string): string;
10
+ export declare function printReportMsg(filepath: string): void;
11
+ /**
12
+ * Get the current execution file name
13
+ * @returns The name of the current execution file
14
+ */
15
+ export declare function getCurrentExecutionFile(trace?: string): string | false;
16
+ export declare function generateCacheId(fileName?: string): string;
17
+ export declare function ifPlanLocateParamIsBbox(planLocateParam: PlanningLocateParam): boolean;
18
+ export declare function matchElementFromPlan(planLocateParam: PlanningLocateParam): LocateResultElement | undefined;
19
+ export declare function matchElementFromCache(context: {
20
+ taskCache?: TaskCache;
21
+ interfaceInstance: AbstractInterface;
22
+ }, cacheEntry: ElementCacheFeature | undefined, cachePrompt: TUserPrompt, cacheable: boolean | undefined): Promise<LocateResultElement | undefined>;
23
+ export declare const getMidsceneVersion: () => string;
24
+ export declare const parsePrompt: (prompt: TUserPrompt) => {
25
+ textPrompt: string;
26
+ multimodalPrompt?: TMultimodalPrompt;
27
+ };
28
+ export declare const transformLogicalElementToScreenshot: (element: LocateResultElement, shrunkShotToLogicalRatio: number) => LocateResultElement;
29
+ export declare const transformLogicalRectToScreenshotRect: (rect: Rect, shrunkShotToLogicalRatio: number) => Rect;
@@ -0,0 +1,77 @@
1
+ import type { PlanningAction } from '../../types';
2
+ export interface BaseAction {
3
+ _metadata: string;
4
+ think?: string;
5
+ }
6
+ export interface TapAction extends BaseAction {
7
+ _metadata: 'do';
8
+ action: 'Tap';
9
+ element: [number, number];
10
+ }
11
+ export interface DoubleTapAction extends BaseAction {
12
+ _metadata: 'do';
13
+ action: 'Double Tap';
14
+ element: [number, number];
15
+ }
16
+ export interface TypeAction extends BaseAction {
17
+ _metadata: 'do';
18
+ action: 'Type';
19
+ text: string;
20
+ }
21
+ export interface SwipeAction extends BaseAction {
22
+ _metadata: 'do';
23
+ action: 'Swipe';
24
+ start: [number, number];
25
+ end: [number, number];
26
+ }
27
+ export interface LongPressAction extends BaseAction {
28
+ _metadata: 'do';
29
+ action: 'Long Press';
30
+ element: [number, number];
31
+ }
32
+ export interface LaunchAction extends BaseAction {
33
+ _metadata: 'do';
34
+ action: 'Launch';
35
+ app: string;
36
+ }
37
+ export interface BackAction extends BaseAction {
38
+ _metadata: 'do';
39
+ action: 'Back';
40
+ }
41
+ export interface HomeAction extends BaseAction {
42
+ _metadata: 'do';
43
+ action: 'Home';
44
+ }
45
+ export interface WaitAction extends BaseAction {
46
+ _metadata: 'do';
47
+ action: 'Wait';
48
+ durationMs: number;
49
+ }
50
+ export interface InteractAction extends BaseAction {
51
+ _metadata: 'do';
52
+ action: 'Interact';
53
+ }
54
+ export interface CallAPIAction extends BaseAction {
55
+ _metadata: 'do';
56
+ action: 'Call_API';
57
+ instruction: string;
58
+ }
59
+ export interface TakeoverAction extends BaseAction {
60
+ _metadata: 'do';
61
+ action: 'Take_over';
62
+ message: string;
63
+ }
64
+ export interface NoteAction extends BaseAction {
65
+ _metadata: 'do';
66
+ action: 'Note';
67
+ message: string;
68
+ }
69
+ export interface FinishAction extends BaseAction {
70
+ _metadata: 'finish';
71
+ message: string;
72
+ }
73
+ export type ParsedAction = TapAction | DoubleTapAction | TypeAction | SwipeAction | LongPressAction | LaunchAction | BackAction | HomeAction | WaitAction | InteractAction | CallAPIAction | TakeoverAction | NoteAction | FinishAction;
74
+ export declare function transformAutoGLMAction(action: ParsedAction, size: {
75
+ width: number;
76
+ height: number;
77
+ }): PlanningAction[];
@@ -0,0 +1,6 @@
1
+ export { getAutoGLMLocatePrompt, getAutoGLMPlanPrompt } from './prompt';
2
+ export { parseAutoGLMLocateResponse, parseAutoGLMResponse, parseAction, } from './parser';
3
+ export { autoGLMPlanning } from './planning';
4
+ export { transformAutoGLMAction } from './actions';
5
+ export { isAutoGLM, isUITars } from './util';
6
+ export type { ParsedAction } from './actions';
@@ -0,0 +1,18 @@
1
+ import type { ParsedAction } from './actions';
2
+ export declare const extractValueAfter: (src: string, key: string) => string;
3
+ export declare function parseAction(response: {
4
+ think: string;
5
+ content: string;
6
+ }): ParsedAction;
7
+ export declare function parseAutoGLMResponse(content: string): {
8
+ think: string;
9
+ content: string;
10
+ };
11
+ export declare function parseAutoGLMLocateResponse(rawResponse: string): {
12
+ think: string;
13
+ coordinates: {
14
+ x: number;
15
+ y: number;
16
+ } | null;
17
+ error?: string;
18
+ };
@@ -0,0 +1,10 @@
1
+ import type { PlanningAIResponse, UIContext } from '../../types';
2
+ import type { IModelConfig } from '@midscene/shared/env';
3
+ import type { ConversationHistory } from '../conversation-history';
4
+ export declare function autoGLMPlanning(userInstruction: string, options: {
5
+ conversationHistory: ConversationHistory;
6
+ context: UIContext;
7
+ modelConfig: IModelConfig;
8
+ actionContext?: string;
9
+ abortSignal?: AbortSignal;
10
+ }): Promise<PlanningAIResponse>;
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Auto-GLM Prompt Templates
3
+ *
4
+ * Portions of this file are derived from Open-AutoGLM
5
+ * Copyright (c) 2024 zai-org
6
+ * Licensed under the Apache License, Version 2.0
7
+ *
8
+ * Source: https://github.com/zai-org/Open-AutoGLM
9
+ *
10
+ * Licensed under the Apache License, Version 2.0 (the "License");
11
+ * you may not use this file except in compliance with the License.
12
+ * You may obtain a copy of the License at
13
+ *
14
+ * http://www.apache.org/licenses/LICENSE-2.0
15
+ *
16
+ * Unless required by applicable law or agreed to in writing, software
17
+ * distributed under the License is distributed on an "AS IS" BASIS,
18
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ * See the License for the specific language governing permissions and
20
+ * limitations under the License.
21
+ *
22
+ * Modifications:
23
+ * - Adapted prompts for Midscene.js integration
24
+ */
25
+ import type { TModelFamily } from '@midscene/shared/env';
26
+ export declare const getAutoGLMPlanPrompt: (modelFamily: TModelFamily | undefined) => string;
27
+ export declare const getAutoGLMLocatePrompt: (modelFamily: TModelFamily | undefined) => string;
@@ -0,0 +1,13 @@
1
+ import type { TModelFamily } from '@midscene/shared/env';
2
+ /**
3
+ * Check if the modelFamily is auto-glm or auto-glm-multilingual
4
+ * @param modelFamily The model family to check
5
+ * @returns true if modelFamily is auto-glm or auto-glm-multilingual
6
+ */
7
+ export declare function isAutoGLM(modelFamily: TModelFamily | undefined): boolean;
8
+ /**
9
+ * Check if the modelFamily is a UI-TARS variant
10
+ * @param modelFamily The model family to check
11
+ * @returns true if modelFamily is any UI-TARS variant
12
+ */
13
+ export declare function isUITars(modelFamily: TModelFamily | undefined): boolean;
@@ -0,0 +1,105 @@
1
+ import type { SubGoal } from '../types';
2
+ import type { ChatCompletionMessageParam } from 'openai/resources/index';
3
+ export interface ConversationHistoryOptions {
4
+ initialMessages?: ChatCompletionMessageParam[];
5
+ }
6
+ export declare class ConversationHistory {
7
+ private readonly messages;
8
+ private subGoals;
9
+ private memories;
10
+ private historicalLogs;
11
+ pendingFeedbackMessage: string;
12
+ constructor(options?: ConversationHistoryOptions);
13
+ resetPendingFeedbackMessageIfExists(): void;
14
+ append(message: ChatCompletionMessageParam): void;
15
+ seed(messages: ChatCompletionMessageParam[]): void;
16
+ reset(): void;
17
+ /**
18
+ * Snapshot the conversation history, and replace the images with text if the number of images exceeds the limit.
19
+ * @param maxImages - The maximum number of images to include in the snapshot. Undefined means no limit.
20
+ * @returns The snapshot of the conversation history.
21
+ */
22
+ snapshot(maxImages?: number): ChatCompletionMessageParam[];
23
+ get length(): number;
24
+ [Symbol.iterator](): IterableIterator<ChatCompletionMessageParam>;
25
+ toJSON(): ChatCompletionMessageParam[];
26
+ /**
27
+ * Set all sub-goals, replacing any existing ones.
28
+ * Automatically marks the first pending goal as running.
29
+ */
30
+ setSubGoals(subGoals: SubGoal[]): void;
31
+ /**
32
+ * Merge sub-goals from update-plan-content.
33
+ * Preserves existing descriptions when incoming description is empty.
34
+ *
35
+ * This handles compact XML updates like:
36
+ * <sub-goal index="1" status="finished" />
37
+ */
38
+ mergeSubGoals(subGoals: SubGoal[]): void;
39
+ /**
40
+ * Update a single sub-goal by index.
41
+ * Clears logs if status or description actually changes.
42
+ * @returns true if the sub-goal was found and updated, false otherwise
43
+ */
44
+ updateSubGoal(index: number, updates: Partial<Omit<SubGoal, 'index'>>): boolean;
45
+ /**
46
+ * Mark the first pending sub-goal as running.
47
+ * Clears logs since status changes.
48
+ */
49
+ markFirstPendingAsRunning(): void;
50
+ /**
51
+ * Mark a sub-goal as finished.
52
+ * Automatically marks the next pending goal as running.
53
+ * @returns true if the sub-goal was found and updated, false otherwise
54
+ */
55
+ markSubGoalFinished(index: number): boolean;
56
+ /**
57
+ * Mark all sub-goals as finished.
58
+ * Clears logs for any goal whose status actually changes.
59
+ */
60
+ markAllSubGoalsFinished(): void;
61
+ /**
62
+ * Append a log entry to the currently running sub-goal.
63
+ * The log describes an action performed while working on the sub-goal.
64
+ */
65
+ appendSubGoalLog(log: string): void;
66
+ /**
67
+ * Convert sub-goals to text representation.
68
+ * Includes actions performed (logs) for the current sub-goal.
69
+ */
70
+ subGoalsToText(): string;
71
+ /**
72
+ * Append a log entry to the historical logs list.
73
+ * Used in non-deepThink mode to track executed steps across planning rounds.
74
+ */
75
+ appendHistoricalLog(log: string): void;
76
+ /**
77
+ * Convert historical logs to text representation.
78
+ * Provides context about previously executed steps to the model.
79
+ */
80
+ historicalLogsToText(): string;
81
+ /**
82
+ * Append a memory to the memories list
83
+ */
84
+ appendMemory(memory: string): void;
85
+ /**
86
+ * Get all memories
87
+ */
88
+ getMemories(): string[];
89
+ /**
90
+ * Convert memories to text representation
91
+ */
92
+ memoriesToText(): string;
93
+ /**
94
+ * Clear all memories
95
+ */
96
+ clearMemories(): void;
97
+ /**
98
+ * Compress the conversation history if it exceeds the threshold.
99
+ * Removes the oldest messages and replaces them with a single placeholder message.
100
+ * @param threshold - The number of messages that triggers compression.
101
+ * @param keepCount - The number of recent messages to keep after compression.
102
+ * @returns true if compression was performed, false otherwise.
103
+ */
104
+ compressHistory(threshold: number, keepCount: number): boolean;
105
+ }
@@ -0,0 +1,14 @@
1
+ export { AIResponseParseError, callAIWithStringResponse, callAIWithObjectResponse, callAI, } from './service-caller/index';
2
+ export { systemPromptToLocateElement } from './prompt/llm-locator';
3
+ export { generatePlaywrightTest, generatePlaywrightTestStream, } from './prompt/playwright-generator';
4
+ export { generateYamlTest, generateYamlTestStream, } from './prompt/yaml-generator';
5
+ export type { ChatCompletionMessageParam } from 'openai/resources/index';
6
+ export { AiLocateElement, AiExtractElementInfo, AiLocateSection, AiJudgeOrderSensitive, } from './inspect';
7
+ export { plan } from './llm-planning';
8
+ export { autoGLMPlanning } from './auto-glm/planning';
9
+ export { adaptBboxToRect } from '../common';
10
+ export { uiTarsPlanning } from './ui-tars-planning';
11
+ export { ConversationHistory, type ConversationHistoryOptions, } from './conversation-history';
12
+ export type { SubGoal, SubGoalStatus } from '../types';
13
+ export type { AIArgs } from '../common';
14
+ export { getMidsceneLocationSchema, PointSchema, SizeSchema, RectSchema, TMultimodalPromptSchema, TUserPromptSchema, type TMultimodalPrompt, type TUserPrompt, findAllMidsceneLocatorField, dumpActionParam, parseActionParam, } from '../common';
@@ -0,0 +1,58 @@
1
+ import type { AIDataExtractionResponse, AIUsageInfo, Rect, ServiceExtractOption, UIContext } from '../types';
2
+ import type { IModelConfig } from '@midscene/shared/env';
3
+ import type { LocateResultElement } from '@midscene/shared/types';
4
+ import type { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources/index';
5
+ import type { TMultimodalPrompt, TUserPrompt } from '../common';
6
+ import { callAIWithObjectResponse } from './service-caller/index';
7
+ export type AIArgs = [
8
+ ChatCompletionSystemMessageParam,
9
+ ...ChatCompletionUserMessageParam[]
10
+ ];
11
+ export declare function AiLocateElement(options: {
12
+ context: UIContext;
13
+ targetElementDescription: TUserPrompt;
14
+ searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;
15
+ modelConfig: IModelConfig;
16
+ abortSignal?: AbortSignal;
17
+ }): Promise<{
18
+ parseResult: {
19
+ elements: LocateResultElement[];
20
+ errors?: string[];
21
+ };
22
+ rect?: Rect;
23
+ rawResponse: string;
24
+ usage?: AIUsageInfo;
25
+ reasoning_content?: string;
26
+ }>;
27
+ export declare function AiLocateSection(options: {
28
+ context: UIContext;
29
+ sectionDescription: TUserPrompt;
30
+ modelConfig: IModelConfig;
31
+ abortSignal?: AbortSignal;
32
+ }): Promise<{
33
+ rect?: Rect;
34
+ imageBase64?: string;
35
+ scale?: number;
36
+ error?: string;
37
+ rawResponse: string;
38
+ usage?: AIUsageInfo;
39
+ }>;
40
+ export declare function AiExtractElementInfo<T>(options: {
41
+ dataQuery: string | Record<string, string>;
42
+ multimodalPrompt?: TMultimodalPrompt;
43
+ context: UIContext;
44
+ pageDescription?: string;
45
+ extractOption?: ServiceExtractOption;
46
+ modelConfig: IModelConfig;
47
+ }): Promise<{
48
+ parseResult: AIDataExtractionResponse<T>;
49
+ rawResponse: string;
50
+ usage: AIUsageInfo | undefined;
51
+ reasoning_content: string | undefined;
52
+ }>;
53
+ export declare function AiJudgeOrderSensitive(description: string, callAIFn: typeof callAIWithObjectResponse<{
54
+ isOrderSensitive: boolean;
55
+ }>, modelConfig: IModelConfig): Promise<{
56
+ isOrderSensitive: boolean;
57
+ usage?: AIUsageInfo;
58
+ }>;
@@ -0,0 +1,19 @@
1
+ import type { DeepThinkOption, DeviceAction, InterfaceType, PlanningAIResponse, RawResponsePlanningAIResponse, UIContext } from '../types';
2
+ import type { IModelConfig, TModelFamily } from '@midscene/shared/env';
3
+ import type { ConversationHistory } from './conversation-history';
4
+ /**
5
+ * Parse XML response from LLM and convert to RawResponsePlanningAIResponse
6
+ */
7
+ export declare function parseXMLPlanningResponse(xmlString: string, modelFamily: TModelFamily | undefined): RawResponsePlanningAIResponse;
8
+ export declare function plan(userInstruction: string, opts: {
9
+ context: UIContext;
10
+ interfaceType: InterfaceType;
11
+ actionSpace: DeviceAction<any>[];
12
+ actionContext?: string;
13
+ modelConfig: IModelConfig;
14
+ conversationHistory: ConversationHistory;
15
+ includeBbox: boolean;
16
+ imagesIncludeCount?: number;
17
+ deepThink?: DeepThinkOption;
18
+ abortSignal?: AbortSignal;
19
+ }): Promise<PlanningAIResponse>;
@@ -0,0 +1,2 @@
1
+ import type { TModelFamily } from '@midscene/shared/env';
2
+ export declare function bboxDescription(modelFamily: TModelFamily | undefined): "box_2d bounding box for the target element, should be [ymin, xmin, ymax, xmax] normalized to 0-1000." | "2d bounding box as [xmin, ymin, xmax, ymax]";
@@ -0,0 +1 @@
1
+ export declare const elementDescriberInstruction: () => string;
@@ -0,0 +1,7 @@
1
+ import type { AIDataExtractionResponse } from '../../types';
2
+ /**
3
+ * Parse XML response from LLM and convert to AIDataExtractionResponse
4
+ */
5
+ export declare function parseXMLExtractionResponse<T>(xmlString: string): AIDataExtractionResponse<T>;
6
+ export declare function systemPromptToExtract(): string;
7
+ export declare const extractDataQueryPrompt: (pageDescription: string, dataQuery: string | Record<string, string>) => string;
@@ -0,0 +1,3 @@
1
+ import type { TModelFamily } from '@midscene/shared/env';
2
+ export declare function systemPromptToLocateElement(modelFamily: TModelFamily | undefined): string;
3
+ export declare const findElementPrompt: (targetElementDescription: string) => string;
@@ -0,0 +1,10 @@
1
+ import type { DeviceAction } from '../../types';
2
+ import type { TModelFamily } from '@midscene/shared/env';
3
+ export declare const descriptionForAction: (action: DeviceAction<any>, locatorSchemaTypeDescription: string, includeBbox?: boolean) => string;
4
+ export declare function systemPromptToTaskPlanning({ actionSpace, modelFamily, includeBbox, includeThought, includeSubGoals, }: {
5
+ actionSpace: DeviceAction<any>[];
6
+ modelFamily: TModelFamily | undefined;
7
+ includeBbox: boolean;
8
+ includeThought?: boolean;
9
+ includeSubGoals?: boolean;
10
+ }): Promise<string>;
@@ -0,0 +1,3 @@
1
+ import type { TModelFamily } from '@midscene/shared/env';
2
+ export declare function systemPromptToLocateSection(modelFamily: TModelFamily | undefined): string;
3
+ export declare const sectionLocatorInstruction: (sectionDescription: string) => string;
@@ -0,0 +1,2 @@
1
+ export declare function systemPromptToJudgeOrderSensitive(): string;
2
+ export declare const orderSensitiveJudgePrompt: (description: string) => string;
@@ -0,0 +1,26 @@
1
+ import type { StreamingAIResponse, StreamingCodeGenerationOptions } from '../../types';
2
+ import type { IModelConfig } from '@midscene/shared/env';
3
+ import { type ChromeRecordedEvent, type EventCounts, type EventSummary, type InputDescription, type ProcessedEvent, createEventCounts, createMessageContent, extractInputDescriptions, filterEventsByType, getScreenshotsForLLM, prepareEventSummary, processEventsForLLM, validateEvents } from './yaml-generator';
4
+ export interface PlaywrightGenerationOptions {
5
+ testName?: string;
6
+ includeScreenshots?: boolean;
7
+ includeTimestamps?: boolean;
8
+ maxScreenshots?: number;
9
+ description?: string;
10
+ viewportSize?: {
11
+ width: number;
12
+ height: number;
13
+ };
14
+ waitForNetworkIdle?: boolean;
15
+ waitForNetworkIdleTimeout?: number;
16
+ }
17
+ export type { ChromeRecordedEvent, EventCounts, InputDescription, ProcessedEvent, EventSummary, };
18
+ export { getScreenshotsForLLM, filterEventsByType, createEventCounts, extractInputDescriptions, processEventsForLLM, prepareEventSummary, createMessageContent, validateEvents, };
19
+ /**
20
+ * Generates Playwright test code from recorded events
21
+ */
22
+ export declare const generatePlaywrightTest: (events: ChromeRecordedEvent[], options: PlaywrightGenerationOptions, modelConfig: IModelConfig) => Promise<string>;
23
+ /**
24
+ * Generates Playwright test code from recorded events with streaming support
25
+ */
26
+ export declare const generatePlaywrightTestStream: (events: ChromeRecordedEvent[], options: PlaywrightGenerationOptions & StreamingCodeGenerationOptions, modelConfig: IModelConfig) => Promise<StreamingAIResponse>;
@@ -0,0 +1,2 @@
1
+ export declare function getUiTarsPlanningPrompt(): string;
2
+ export declare const getSummary: (prediction: string) => string;
@@ -0,0 +1,33 @@
1
+ import type { SubGoal } from '../../types';
2
+ /**
3
+ * Extract content from an XML tag in a string, searching from the end.
4
+ * This approach handles cases where models prepend thinking content (like <think>...</think>)
5
+ * before the actual response tags, or when there are incomplete/nested tags.
6
+ *
7
+ * Strategy: Find the LAST closing tag, then search backwards for the nearest opening tag.
8
+ * This ensures we get the last complete tag pair, even if there are incomplete tags before it.
9
+ *
10
+ * @param xmlString - The XML string to parse
11
+ * @param tagName - The name of the tag to extract (case-insensitive)
12
+ * @returns The trimmed content of the tag, or undefined if not found
13
+ */
14
+ export declare function extractXMLTag(xmlString: string, tagName: string): string | undefined;
15
+ /**
16
+ * Parse sub-goals from XML content
17
+ * Handles both formats:
18
+ * - <sub-goal index="1" status="pending">description</sub-goal>
19
+ * - <sub-goal index="1" status="finished" />
20
+ */
21
+ export declare function parseSubGoalsFromXML(xmlContent: string): SubGoal[];
22
+ /**
23
+ * Extract indexes of sub-goals marked as finished from <mark-sub-goal-done> content
24
+ */
25
+ export declare function parseMarkFinishedIndexes(xmlContent: string): number[];
26
+ export declare const distanceThreshold = 16;
27
+ export declare function distance(point1: {
28
+ x: number;
29
+ y: number;
30
+ }, point2: {
31
+ x: number;
32
+ y: number;
33
+ }): number;