@midscene/core 0.30.10 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. package/dist/es/agent/agent.mjs +233 -144
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/execution-session.mjs +41 -0
  4. package/dist/es/agent/execution-session.mjs.map +1 -0
  5. package/dist/es/agent/index.mjs +3 -3
  6. package/dist/es/agent/task-builder.mjs +319 -0
  7. package/dist/es/agent/task-builder.mjs.map +1 -0
  8. package/dist/es/agent/task-cache.mjs +4 -4
  9. package/dist/es/agent/task-cache.mjs.map +1 -1
  10. package/dist/es/agent/tasks.mjs +197 -504
  11. package/dist/es/agent/tasks.mjs.map +1 -1
  12. package/dist/es/agent/ui-utils.mjs +54 -35
  13. package/dist/es/agent/ui-utils.mjs.map +1 -1
  14. package/dist/es/agent/utils.mjs +16 -58
  15. package/dist/es/agent/utils.mjs.map +1 -1
  16. package/dist/es/ai-model/conversation-history.mjs +25 -13
  17. package/dist/es/ai-model/conversation-history.mjs.map +1 -1
  18. package/dist/es/ai-model/index.mjs +4 -4
  19. package/dist/es/ai-model/inspect.mjs +45 -54
  20. package/dist/es/ai-model/inspect.mjs.map +1 -1
  21. package/dist/es/ai-model/llm-planning.mjs +47 -65
  22. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  23. package/dist/es/ai-model/prompt/assertion.mjs.map +1 -1
  24. package/dist/es/ai-model/prompt/common.mjs.map +1 -1
  25. package/dist/es/ai-model/prompt/describe.mjs.map +1 -1
  26. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -1
  27. package/dist/es/ai-model/prompt/llm-locator.mjs +11 -235
  28. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
  29. package/dist/es/ai-model/prompt/llm-planning.mjs +76 -322
  30. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
  31. package/dist/es/ai-model/prompt/llm-section-locator.mjs +15 -14
  32. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
  33. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +35 -0
  34. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -0
  35. package/dist/es/ai-model/prompt/playwright-generator.mjs +2 -2
  36. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
  37. package/dist/es/ai-model/prompt/ui-tars-locator.mjs.map +1 -1
  38. package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -1
  39. package/dist/es/ai-model/prompt/util.mjs +3 -88
  40. package/dist/es/ai-model/prompt/util.mjs.map +1 -1
  41. package/dist/es/ai-model/prompt/yaml-generator.mjs +10 -10
  42. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
  43. package/dist/es/ai-model/service-caller/index.mjs +182 -274
  44. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  45. package/dist/es/ai-model/ui-tars-planning.mjs +69 -8
  46. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
  47. package/dist/es/{ai-model/common.mjs → common.mjs} +18 -30
  48. package/dist/es/common.mjs.map +1 -0
  49. package/dist/es/device/device-options.mjs +0 -0
  50. package/dist/es/device/index.mjs +29 -12
  51. package/dist/es/device/index.mjs.map +1 -1
  52. package/dist/es/index.mjs +5 -4
  53. package/dist/es/index.mjs.map +1 -1
  54. package/dist/es/report.mjs.map +1 -1
  55. package/dist/es/{insight → service}/index.mjs +38 -51
  56. package/dist/es/service/index.mjs.map +1 -0
  57. package/dist/es/{insight → service}/utils.mjs +3 -3
  58. package/dist/es/service/utils.mjs.map +1 -0
  59. package/dist/es/task-runner.mjs +264 -0
  60. package/dist/es/task-runner.mjs.map +1 -0
  61. package/dist/es/tree.mjs +13 -2
  62. package/dist/es/tree.mjs.map +1 -0
  63. package/dist/es/types.mjs +18 -1
  64. package/dist/es/types.mjs.map +1 -1
  65. package/dist/es/utils.mjs +6 -7
  66. package/dist/es/utils.mjs.map +1 -1
  67. package/dist/es/yaml/builder.mjs.map +1 -1
  68. package/dist/es/yaml/player.mjs +121 -98
  69. package/dist/es/yaml/player.mjs.map +1 -1
  70. package/dist/es/yaml/utils.mjs +1 -1
  71. package/dist/es/yaml/utils.mjs.map +1 -1
  72. package/dist/lib/agent/agent.js +231 -142
  73. package/dist/lib/agent/agent.js.map +1 -1
  74. package/dist/lib/agent/common.js +1 -1
  75. package/dist/lib/agent/execution-session.js +75 -0
  76. package/dist/lib/agent/execution-session.js.map +1 -0
  77. package/dist/lib/agent/index.js +14 -14
  78. package/dist/lib/agent/index.js.map +1 -1
  79. package/dist/lib/agent/task-builder.js +356 -0
  80. package/dist/lib/agent/task-builder.js.map +1 -0
  81. package/dist/lib/agent/task-cache.js +8 -8
  82. package/dist/lib/agent/task-cache.js.map +1 -1
  83. package/dist/lib/agent/tasks.js +202 -506
  84. package/dist/lib/agent/tasks.js.map +1 -1
  85. package/dist/lib/agent/ui-utils.js +58 -36
  86. package/dist/lib/agent/ui-utils.js.map +1 -1
  87. package/dist/lib/agent/utils.js +26 -68
  88. package/dist/lib/agent/utils.js.map +1 -1
  89. package/dist/lib/ai-model/conversation-history.js +27 -15
  90. package/dist/lib/ai-model/conversation-history.js.map +1 -1
  91. package/dist/lib/ai-model/index.js +27 -27
  92. package/dist/lib/ai-model/index.js.map +1 -1
  93. package/dist/lib/ai-model/inspect.js +51 -57
  94. package/dist/lib/ai-model/inspect.js.map +1 -1
  95. package/dist/lib/ai-model/llm-planning.js +49 -67
  96. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  97. package/dist/lib/ai-model/prompt/assertion.js +2 -2
  98. package/dist/lib/ai-model/prompt/assertion.js.map +1 -1
  99. package/dist/lib/ai-model/prompt/common.js +2 -2
  100. package/dist/lib/ai-model/prompt/common.js.map +1 -1
  101. package/dist/lib/ai-model/prompt/describe.js +2 -2
  102. package/dist/lib/ai-model/prompt/describe.js.map +1 -1
  103. package/dist/lib/ai-model/prompt/extraction.js +2 -2
  104. package/dist/lib/ai-model/prompt/extraction.js.map +1 -1
  105. package/dist/lib/ai-model/prompt/llm-locator.js +14 -241
  106. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
  107. package/dist/lib/ai-model/prompt/llm-planning.js +79 -328
  108. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
  109. package/dist/lib/ai-model/prompt/llm-section-locator.js +17 -16
  110. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
  111. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +72 -0
  112. package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -0
  113. package/dist/lib/ai-model/prompt/playwright-generator.js +11 -11
  114. package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
  115. package/dist/lib/ai-model/prompt/ui-tars-locator.js +2 -2
  116. package/dist/lib/ai-model/prompt/ui-tars-locator.js.map +1 -1
  117. package/dist/lib/ai-model/prompt/ui-tars-planning.js +2 -2
  118. package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -1
  119. package/dist/lib/ai-model/prompt/util.js +7 -95
  120. package/dist/lib/ai-model/prompt/util.js.map +1 -1
  121. package/dist/lib/ai-model/prompt/yaml-generator.js +18 -18
  122. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
  123. package/dist/lib/ai-model/service-caller/index.js +288 -401
  124. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  125. package/dist/lib/ai-model/ui-tars-planning.js +71 -10
  126. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
  127. package/dist/lib/{ai-model/common.js → common.js} +40 -55
  128. package/dist/lib/common.js.map +1 -0
  129. package/dist/lib/device/device-options.js +20 -0
  130. package/dist/lib/device/device-options.js.map +1 -0
  131. package/dist/lib/device/index.js +63 -40
  132. package/dist/lib/device/index.js.map +1 -1
  133. package/dist/lib/image/index.js +5 -5
  134. package/dist/lib/image/index.js.map +1 -1
  135. package/dist/lib/index.js +24 -20
  136. package/dist/lib/index.js.map +1 -1
  137. package/dist/lib/report.js +2 -2
  138. package/dist/lib/report.js.map +1 -1
  139. package/dist/lib/{insight → service}/index.js +41 -54
  140. package/dist/lib/service/index.js.map +1 -0
  141. package/dist/lib/{insight → service}/utils.js +7 -7
  142. package/dist/lib/service/utils.js.map +1 -0
  143. package/dist/lib/task-runner.js +301 -0
  144. package/dist/lib/task-runner.js.map +1 -0
  145. package/dist/lib/tree.js +13 -4
  146. package/dist/lib/tree.js.map +1 -1
  147. package/dist/lib/types.js +31 -12
  148. package/dist/lib/types.js.map +1 -1
  149. package/dist/lib/utils.js +16 -17
  150. package/dist/lib/utils.js.map +1 -1
  151. package/dist/lib/yaml/builder.js +2 -2
  152. package/dist/lib/yaml/builder.js.map +1 -1
  153. package/dist/lib/yaml/index.js +16 -22
  154. package/dist/lib/yaml/index.js.map +1 -1
  155. package/dist/lib/yaml/player.js +123 -100
  156. package/dist/lib/yaml/player.js.map +1 -1
  157. package/dist/lib/yaml/utils.js +6 -6
  158. package/dist/lib/yaml/utils.js.map +1 -1
  159. package/dist/lib/yaml.js +1 -1
  160. package/dist/lib/yaml.js.map +1 -1
  161. package/dist/types/agent/agent.d.ts +62 -17
  162. package/dist/types/agent/execution-session.d.ts +36 -0
  163. package/dist/types/agent/index.d.ts +3 -2
  164. package/dist/types/agent/task-builder.d.ts +35 -0
  165. package/dist/types/agent/tasks.d.ts +32 -23
  166. package/dist/types/agent/ui-utils.d.ts +9 -2
  167. package/dist/types/agent/utils.d.ts +9 -35
  168. package/dist/types/ai-model/conversation-history.d.ts +8 -4
  169. package/dist/types/ai-model/index.d.ts +5 -5
  170. package/dist/types/ai-model/inspect.d.ts +20 -12
  171. package/dist/types/ai-model/llm-planning.d.ts +3 -1
  172. package/dist/types/ai-model/prompt/llm-locator.d.ts +1 -6
  173. package/dist/types/ai-model/prompt/llm-planning.d.ts +2 -3
  174. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +1 -3
  175. package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
  176. package/dist/types/ai-model/prompt/util.d.ts +2 -34
  177. package/dist/types/ai-model/service-caller/index.d.ts +2 -3
  178. package/dist/types/ai-model/ui-tars-planning.d.ts +15 -2
  179. package/dist/types/{ai-model/common.d.ts → common.d.ts} +6 -6
  180. package/dist/types/device/device-options.d.ts +57 -0
  181. package/dist/types/device/index.d.ts +55 -39
  182. package/dist/types/index.d.ts +7 -6
  183. package/dist/types/service/index.d.ts +26 -0
  184. package/dist/types/service/utils.d.ts +2 -0
  185. package/dist/types/task-runner.d.ts +49 -0
  186. package/dist/types/tree.d.ts +4 -1
  187. package/dist/types/types.d.ts +103 -66
  188. package/dist/types/yaml/utils.d.ts +1 -1
  189. package/dist/types/yaml.d.ts +68 -43
  190. package/package.json +9 -12
  191. package/dist/es/ai-model/action-executor.mjs +0 -129
  192. package/dist/es/ai-model/action-executor.mjs.map +0 -1
  193. package/dist/es/ai-model/common.mjs.map +0 -1
  194. package/dist/es/insight/index.mjs.map +0 -1
  195. package/dist/es/insight/utils.mjs.map +0 -1
  196. package/dist/lib/ai-model/action-executor.js +0 -163
  197. package/dist/lib/ai-model/action-executor.js.map +0 -1
  198. package/dist/lib/ai-model/common.js.map +0 -1
  199. package/dist/lib/insight/index.js.map +0 -1
  200. package/dist/lib/insight/utils.js.map +0 -1
  201. package/dist/types/ai-model/action-executor.d.ts +0 -19
  202. package/dist/types/insight/index.d.ts +0 -31
  203. package/dist/types/insight/utils.d.ts +0 -2
@@ -1,4 +1,5 @@
1
- import type { DeviceAction, LocateResultElement } from '../types';
1
+ import type { ActionScrollParam, DeviceAction, LocateResultElement } from '../types';
2
+ import type { IModelConfig } from '@midscene/shared/env';
2
3
  import type { ElementNode } from '@midscene/shared/extractor';
3
4
  import { z } from 'zod';
4
5
  import type { ElementCacheFeature, Rect, Size, UIContext } from '../types';
@@ -6,9 +7,10 @@ export declare abstract class AbstractInterface {
6
7
  abstract interfaceType: string;
7
8
  abstract screenshotBase64(): Promise<string>;
8
9
  abstract size(): Promise<Size>;
9
- abstract actionSpace(): DeviceAction[] | Promise<DeviceAction[]>;
10
- abstract cacheFeatureForRect?(rect: Rect, opt?: {
11
- _orderSensitive: boolean;
10
+ abstract actionSpace(): DeviceAction[];
11
+ abstract cacheFeatureForRect?(rect: Rect, options?: {
12
+ targetDescription?: string;
13
+ modelConfig?: IModelConfig;
12
14
  }): Promise<ElementCacheFeature>;
13
15
  abstract rectMatchesCacheFeature?(feature: ElementCacheFeature): Promise<Rect>;
14
16
  abstract destroy?(): Promise<void>;
@@ -20,13 +22,13 @@ export declare abstract class AbstractInterface {
20
22
  abstract evaluateJavaScript?<T = any>(script: string): Promise<T>;
21
23
  abstract getContext?(): Promise<UIContext>;
22
24
  }
23
- export declare const defineAction: <TSchema extends z.ZodType, TRuntime = z.infer<TSchema>>(config: {
25
+ export declare const defineAction: <TSchema extends z.ZodType | undefined = undefined, TRuntime = TSchema extends z.ZodType ? z.infer<TSchema> : undefined, TReturn = any>(config: {
24
26
  name: string;
25
27
  description: string;
26
28
  interfaceAlias?: string;
27
- paramSchema: TSchema;
28
- call: (param: TRuntime) => Promise<void>;
29
- } & Partial<Omit<DeviceAction<TRuntime>, "name" | "description" | "interfaceAlias" | "paramSchema" | "call">>) => DeviceAction<TRuntime>;
29
+ paramSchema?: TSchema;
30
+ call: (param: TRuntime) => Promise<TReturn> | TReturn;
31
+ } & Partial<Omit<DeviceAction<TRuntime, TReturn>, "name" | "description" | "interfaceAlias" | "paramSchema" | "call">>) => DeviceAction<TRuntime, TReturn>;
30
32
  export declare const actionTapParamSchema: z.ZodObject<{
31
33
  locate: z.ZodObject<{
32
34
  prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
@@ -145,8 +147,8 @@ export declare const actionTapParamSchema: z.ZodObject<{
145
147
  }[] | undefined;
146
148
  convertHttpImage2Base64?: boolean | undefined;
147
149
  });
148
- cacheable?: boolean | undefined;
149
150
  deepThink?: boolean | undefined;
151
+ cacheable?: boolean | undefined;
150
152
  xpath?: string | boolean | undefined;
151
153
  } & {
152
154
  [k: string]: unknown;
@@ -162,8 +164,8 @@ export declare const actionTapParamSchema: z.ZodObject<{
162
164
  }[] | undefined;
163
165
  convertHttpImage2Base64?: boolean | undefined;
164
166
  });
165
- cacheable?: boolean | undefined;
166
167
  deepThink?: boolean | undefined;
168
+ cacheable?: boolean | undefined;
167
169
  xpath?: string | boolean | undefined;
168
170
  } & {
169
171
  [k: string]: unknown;
@@ -291,8 +293,8 @@ export declare const actionRightClickParamSchema: z.ZodObject<{
291
293
  }[] | undefined;
292
294
  convertHttpImage2Base64?: boolean | undefined;
293
295
  });
294
- cacheable?: boolean | undefined;
295
296
  deepThink?: boolean | undefined;
297
+ cacheable?: boolean | undefined;
296
298
  xpath?: string | boolean | undefined;
297
299
  } & {
298
300
  [k: string]: unknown;
@@ -308,8 +310,8 @@ export declare const actionRightClickParamSchema: z.ZodObject<{
308
310
  }[] | undefined;
309
311
  convertHttpImage2Base64?: boolean | undefined;
310
312
  });
311
- cacheable?: boolean | undefined;
312
313
  deepThink?: boolean | undefined;
314
+ cacheable?: boolean | undefined;
313
315
  xpath?: string | boolean | undefined;
314
316
  } & {
315
317
  [k: string]: unknown;
@@ -437,8 +439,8 @@ export declare const actionDoubleClickParamSchema: z.ZodObject<{
437
439
  }[] | undefined;
438
440
  convertHttpImage2Base64?: boolean | undefined;
439
441
  });
440
- cacheable?: boolean | undefined;
441
442
  deepThink?: boolean | undefined;
443
+ cacheable?: boolean | undefined;
442
444
  xpath?: string | boolean | undefined;
443
445
  } & {
444
446
  [k: string]: unknown;
@@ -454,8 +456,8 @@ export declare const actionDoubleClickParamSchema: z.ZodObject<{
454
456
  }[] | undefined;
455
457
  convertHttpImage2Base64?: boolean | undefined;
456
458
  });
457
- cacheable?: boolean | undefined;
458
459
  deepThink?: boolean | undefined;
460
+ cacheable?: boolean | undefined;
459
461
  xpath?: string | boolean | undefined;
460
462
  } & {
461
463
  [k: string]: unknown;
@@ -583,8 +585,8 @@ export declare const actionHoverParamSchema: z.ZodObject<{
583
585
  }[] | undefined;
584
586
  convertHttpImage2Base64?: boolean | undefined;
585
587
  });
586
- cacheable?: boolean | undefined;
587
588
  deepThink?: boolean | undefined;
589
+ cacheable?: boolean | undefined;
588
590
  xpath?: string | boolean | undefined;
589
591
  } & {
590
592
  [k: string]: unknown;
@@ -600,8 +602,8 @@ export declare const actionHoverParamSchema: z.ZodObject<{
600
602
  }[] | undefined;
601
603
  convertHttpImage2Base64?: boolean | undefined;
602
604
  });
603
- cacheable?: boolean | undefined;
604
605
  deepThink?: boolean | undefined;
606
+ cacheable?: boolean | undefined;
605
607
  xpath?: string | boolean | undefined;
606
608
  } & {
607
609
  [k: string]: unknown;
@@ -997,7 +999,7 @@ export type ActionKeyboardPressParam = {
997
999
  export declare const defineActionKeyboardPress: (call: (param: ActionKeyboardPressParam) => Promise<void>) => DeviceAction<ActionKeyboardPressParam>;
998
1000
  export declare const actionScrollParamSchema: z.ZodObject<{
999
1001
  direction: z.ZodDefault<z.ZodEnum<["down", "up", "right", "left"]>>;
1000
- scrollType: z.ZodDefault<z.ZodEnum<["once", "untilBottom", "untilTop", "untilRight", "untilLeft"]>>;
1002
+ scrollType: z.ZodDefault<z.ZodEnum<["singleAction", "scrollToBottom", "scrollToTop", "scrollToRight", "scrollToLeft"]>>;
1001
1003
  distance: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
1002
1004
  locate: z.ZodOptional<z.ZodObject<{
1003
1005
  prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
@@ -1106,8 +1108,8 @@ export declare const actionScrollParamSchema: z.ZodObject<{
1106
1108
  xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
1107
1109
  }, z.ZodTypeAny, "passthrough">>>;
1108
1110
  }, "strip", z.ZodTypeAny, {
1109
- direction: "left" | "right" | "down" | "up";
1110
- scrollType: "once" | "untilBottom" | "untilTop" | "untilRight" | "untilLeft";
1111
+ direction: "down" | "up" | "right" | "left";
1112
+ scrollType: "singleAction" | "scrollToBottom" | "scrollToTop" | "scrollToRight" | "scrollToLeft";
1111
1113
  locate?: z.objectOutputType<{
1112
1114
  prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
1113
1115
  prompt: z.ZodString;
@@ -1182,16 +1184,10 @@ export declare const actionScrollParamSchema: z.ZodObject<{
1182
1184
  cacheable: z.ZodOptional<z.ZodBoolean>;
1183
1185
  xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
1184
1186
  }, z.ZodTypeAny, "passthrough"> | undefined;
1185
- direction?: "left" | "right" | "down" | "up" | undefined;
1186
- scrollType?: "once" | "untilBottom" | "untilTop" | "untilRight" | "untilLeft" | undefined;
1187
+ direction?: "down" | "up" | "right" | "left" | undefined;
1188
+ scrollType?: "singleAction" | "scrollToBottom" | "scrollToTop" | "scrollToRight" | "scrollToLeft" | undefined;
1187
1189
  distance?: number | null | undefined;
1188
1190
  }>;
1189
- export type ActionScrollParam = {
1190
- direction?: 'down' | 'up' | 'right' | 'left';
1191
- scrollType?: 'once' | 'untilBottom' | 'untilTop' | 'untilRight' | 'untilLeft';
1192
- distance?: number | null;
1193
- locate?: LocateResultElement;
1194
- };
1195
1191
  export declare const defineActionScroll: (call: (param: ActionScrollParam) => Promise<void>) => DeviceAction<ActionScrollParam>;
1196
1192
  export declare const actionDragAndDropParamSchema: z.ZodObject<{
1197
1193
  from: z.ZodObject<{
@@ -1417,8 +1413,8 @@ export declare const actionDragAndDropParamSchema: z.ZodObject<{
1417
1413
  }[] | undefined;
1418
1414
  convertHttpImage2Base64?: boolean | undefined;
1419
1415
  });
1420
- cacheable?: boolean | undefined;
1421
1416
  deepThink?: boolean | undefined;
1417
+ cacheable?: boolean | undefined;
1422
1418
  xpath?: string | boolean | undefined;
1423
1419
  } & {
1424
1420
  [k: string]: unknown;
@@ -1433,8 +1429,8 @@ export declare const actionDragAndDropParamSchema: z.ZodObject<{
1433
1429
  }[] | undefined;
1434
1430
  convertHttpImage2Base64?: boolean | undefined;
1435
1431
  });
1436
- cacheable?: boolean | undefined;
1437
1432
  deepThink?: boolean | undefined;
1433
+ cacheable?: boolean | undefined;
1438
1434
  xpath?: string | boolean | undefined;
1439
1435
  } & {
1440
1436
  [k: string]: unknown;
@@ -1450,8 +1446,8 @@ export declare const actionDragAndDropParamSchema: z.ZodObject<{
1450
1446
  }[] | undefined;
1451
1447
  convertHttpImage2Base64?: boolean | undefined;
1452
1448
  });
1453
- cacheable?: boolean | undefined;
1454
1449
  deepThink?: boolean | undefined;
1450
+ cacheable?: boolean | undefined;
1455
1451
  xpath?: string | boolean | undefined;
1456
1452
  } & {
1457
1453
  [k: string]: unknown;
@@ -1466,8 +1462,8 @@ export declare const actionDragAndDropParamSchema: z.ZodObject<{
1466
1462
  }[] | undefined;
1467
1463
  convertHttpImage2Base64?: boolean | undefined;
1468
1464
  });
1469
- cacheable?: boolean | undefined;
1470
1465
  deepThink?: boolean | undefined;
1466
+ cacheable?: boolean | undefined;
1471
1467
  xpath?: string | boolean | undefined;
1472
1468
  } & {
1473
1469
  [k: string]: unknown;
@@ -1597,8 +1593,8 @@ export declare const ActionLongPressParamSchema: z.ZodObject<{
1597
1593
  }[] | undefined;
1598
1594
  convertHttpImage2Base64?: boolean | undefined;
1599
1595
  });
1600
- cacheable?: boolean | undefined;
1601
1596
  deepThink?: boolean | undefined;
1597
+ cacheable?: boolean | undefined;
1602
1598
  xpath?: string | boolean | undefined;
1603
1599
  } & {
1604
1600
  [k: string]: unknown;
@@ -1615,8 +1611,8 @@ export declare const ActionLongPressParamSchema: z.ZodObject<{
1615
1611
  }[] | undefined;
1616
1612
  convertHttpImage2Base64?: boolean | undefined;
1617
1613
  });
1618
- cacheable?: boolean | undefined;
1619
1614
  deepThink?: boolean | undefined;
1615
+ cacheable?: boolean | undefined;
1620
1616
  xpath?: string | boolean | undefined;
1621
1617
  } & {
1622
1618
  [k: string]: unknown;
@@ -1847,8 +1843,7 @@ export declare const ActionSwipeParamSchema: z.ZodObject<{
1847
1843
  repeat: z.ZodOptional<z.ZodNumber>;
1848
1844
  }, "strip", z.ZodTypeAny, {
1849
1845
  duration: number;
1850
- repeat?: number | undefined;
1851
- direction?: "left" | "right" | "down" | "up" | undefined;
1846
+ direction?: "down" | "up" | "right" | "left" | undefined;
1852
1847
  distance?: number | undefined;
1853
1848
  start?: z.objectOutputType<{
1854
1849
  prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
@@ -1922,9 +1917,9 @@ export declare const ActionSwipeParamSchema: z.ZodObject<{
1922
1917
  cacheable: z.ZodOptional<z.ZodBoolean>;
1923
1918
  xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
1924
1919
  }, z.ZodTypeAny, "passthrough"> | undefined;
1925
- }, {
1926
1920
  repeat?: number | undefined;
1927
- direction?: "left" | "right" | "down" | "up" | undefined;
1921
+ }, {
1922
+ direction?: "down" | "up" | "right" | "left" | undefined;
1928
1923
  distance?: number | undefined;
1929
1924
  duration?: number | undefined;
1930
1925
  start?: z.objectInputType<{
@@ -1999,6 +1994,7 @@ export declare const ActionSwipeParamSchema: z.ZodObject<{
1999
1994
  cacheable: z.ZodOptional<z.ZodBoolean>;
2000
1995
  xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
2001
1996
  }, z.ZodTypeAny, "passthrough"> | undefined;
1997
+ repeat?: number | undefined;
2002
1998
  }>;
2003
1999
  export type ActionSwipeParam = {
2004
2000
  start?: LocateResultElement;
@@ -2127,8 +2123,8 @@ export declare const actionClearInputParamSchema: z.ZodObject<{
2127
2123
  }[] | undefined;
2128
2124
  convertHttpImage2Base64?: boolean | undefined;
2129
2125
  });
2130
- cacheable?: boolean | undefined;
2131
2126
  deepThink?: boolean | undefined;
2127
+ cacheable?: boolean | undefined;
2132
2128
  xpath?: string | boolean | undefined;
2133
2129
  } & {
2134
2130
  [k: string]: unknown;
@@ -2144,8 +2140,8 @@ export declare const actionClearInputParamSchema: z.ZodObject<{
2144
2140
  }[] | undefined;
2145
2141
  convertHttpImage2Base64?: boolean | undefined;
2146
2142
  });
2147
- cacheable?: boolean | undefined;
2148
2143
  deepThink?: boolean | undefined;
2144
+ cacheable?: boolean | undefined;
2149
2145
  xpath?: string | boolean | undefined;
2150
2146
  } & {
2151
2147
  [k: string]: unknown;
@@ -2155,4 +2151,24 @@ export type ActionClearInputParam = {
2155
2151
  locate: LocateResultElement;
2156
2152
  };
2157
2153
  export declare const defineActionClearInput: (call: (param: ActionClearInputParam) => Promise<void>) => DeviceAction<ActionClearInputParam>;
2154
+ export declare const actionAssertParamSchema: z.ZodObject<{
2155
+ condition: z.ZodString;
2156
+ thought: z.ZodString;
2157
+ result: z.ZodBoolean;
2158
+ }, "strip", z.ZodTypeAny, {
2159
+ condition: string;
2160
+ thought: string;
2161
+ result: boolean;
2162
+ }, {
2163
+ condition: string;
2164
+ thought: string;
2165
+ result: boolean;
2166
+ }>;
2167
+ export type ActionAssertParam = {
2168
+ condition: string;
2169
+ thought: string;
2170
+ result: boolean;
2171
+ };
2172
+ export declare const defineActionAssert: () => DeviceAction<ActionAssertParam>;
2158
2173
  export type { DeviceAction } from '../types';
2174
+ export type { AndroidDeviceOpt, AndroidDeviceInputOpt, IOSDeviceOpt, IOSDeviceInputOpt, } from './device-options';
@@ -1,12 +1,13 @@
1
1
  import { z } from 'zod';
2
- import { Executor } from './ai-model/action-executor';
3
- import Insight from './insight/index';
2
+ import Service from './service/index';
3
+ import { TaskRunner } from './task-runner';
4
4
  import { getVersion } from './utils';
5
5
  export { plan, describeUserPage, AiLocateElement, getMidsceneLocationSchema, type MidsceneLocationResultType, PointSchema, SizeSchema, RectSchema, TMultimodalPromptSchema, TUserPromptSchema, type TMultimodalPrompt, type TUserPrompt, } from './ai-model/index';
6
- export { MIDSCENE_MODEL_NAME } from '@midscene/shared/env';
6
+ export { MIDSCENE_MODEL_NAME, type CreateOpenAIClientFn, } from '@midscene/shared/env';
7
7
  export type * from './types';
8
+ export { ServiceError } from './types';
8
9
  export { z };
9
- export default Insight;
10
- export { Executor, Insight, getVersion };
10
+ export default Service;
11
+ export { TaskRunner, Service, getVersion };
11
12
  export type { MidsceneYamlScript, MidsceneYamlTask, MidsceneYamlFlowItem, MidsceneYamlConfigResult, MidsceneYamlConfig, MidsceneYamlScriptWebEnv, MidsceneYamlScriptAndroidEnv, MidsceneYamlScriptIOSEnv, MidsceneYamlScriptEnv, LocateOption, DetailedLocateParam, } from './yaml';
12
- export { Agent, type AgentOpt, createAgent } from './agent';
13
+ export { Agent, type AgentOpt, type AiActOptions, createAgent } from './agent';
@@ -0,0 +1,26 @@
1
+ import { callAIWithObjectResponse } from '../ai-model/index';
2
+ import type { AIDescribeElementResponse, DetailedLocateParam, LocateResultWithDump, Rect, ServiceExtractOption, ServiceExtractParam, ServiceExtractResult, ServiceTaskInfo, UIContext } from '../types';
3
+ import { type IModelConfig } from '@midscene/shared/env';
4
+ import type { TMultimodalPrompt } from '../common';
5
+ export interface LocateOpts {
6
+ context?: UIContext;
7
+ }
8
+ export type AnyValue<T> = {
9
+ [K in keyof T]: unknown extends T[K] ? any : T[K];
10
+ };
11
+ interface ServiceOptions {
12
+ taskInfo?: Omit<ServiceTaskInfo, 'durationMs'>;
13
+ aiVendorFn?: typeof callAIWithObjectResponse;
14
+ }
15
+ export default class Service {
16
+ contextRetrieverFn: () => Promise<UIContext> | UIContext;
17
+ aiVendorFn: Exclude<ServiceOptions['aiVendorFn'], undefined>;
18
+ taskInfo?: Omit<ServiceTaskInfo, 'durationMs'>;
19
+ constructor(context: UIContext | (() => Promise<UIContext> | UIContext), opt?: ServiceOptions);
20
+ locate(query: DetailedLocateParam, opt: LocateOpts, modelConfig: IModelConfig): Promise<LocateResultWithDump>;
21
+ extract<T>(dataDemand: ServiceExtractParam, modelConfig: IModelConfig, opt?: ServiceExtractOption, pageDescription?: string, multimodalPrompt?: TMultimodalPrompt): Promise<ServiceExtractResult<T>>;
22
+ describe(target: Rect | [number, number], modelConfig: IModelConfig, opt?: {
23
+ deepThink?: boolean;
24
+ }): Promise<Pick<AIDescribeElementResponse, 'description'>>;
25
+ }
26
+ export {};
@@ -0,0 +1,2 @@
1
+ import type { PartialServiceDumpFromSDK, ServiceDump } from '../types';
2
+ export declare function createServiceDump(data: PartialServiceDumpFromSDK): ServiceDump;
@@ -0,0 +1,49 @@
1
+ import type { ExecutionDump, ExecutionTask, ExecutionTaskApply, ExecutionTaskProgressOptions, UIContext } from './types';
2
+ type TaskRunnerInitOptions = ExecutionTaskProgressOptions & {
3
+ tasks?: ExecutionTaskApply[];
4
+ onTaskUpdate?: (runner: TaskRunner, error?: TaskExecutionError) => Promise<void> | void;
5
+ };
6
+ type TaskRunnerOperationOptions = {
7
+ allowWhenError?: boolean;
8
+ };
9
+ export declare class TaskRunner {
10
+ name: string;
11
+ tasks: ExecutionTask[];
12
+ status: 'init' | 'pending' | 'running' | 'completed' | 'error';
13
+ onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];
14
+ private readonly uiContextBuilder;
15
+ private readonly onTaskUpdate?;
16
+ constructor(name: string, uiContextBuilder: () => Promise<UIContext>, options?: TaskRunnerInitOptions);
17
+ private emitOnTaskUpdate;
18
+ private lastUiContext?;
19
+ private getUiContext;
20
+ private captureScreenshot;
21
+ private attachRecorderItem;
22
+ private markTaskAsPending;
23
+ private normalizeStatusFromError;
24
+ private findPreviousNonSubTaskUIContext;
25
+ append(task: ExecutionTaskApply[] | ExecutionTaskApply, options?: TaskRunnerOperationOptions): Promise<void>;
26
+ appendAndFlush(task: ExecutionTaskApply[] | ExecutionTaskApply, options?: TaskRunnerOperationOptions): Promise<{
27
+ output: any;
28
+ thought?: string;
29
+ } | undefined>;
30
+ flush(options?: TaskRunnerOperationOptions): Promise<{
31
+ output: any;
32
+ thought?: string;
33
+ } | undefined>;
34
+ isInErrorState(): boolean;
35
+ latestErrorTask(): ExecutionTask | null;
36
+ dump(): ExecutionDump;
37
+ appendErrorPlan(errorMsg: string): Promise<{
38
+ output: undefined;
39
+ runner: TaskRunner;
40
+ }>;
41
+ }
42
+ export declare class TaskExecutionError extends Error {
43
+ runner: TaskRunner;
44
+ errorTask: ExecutionTask | null;
45
+ constructor(message: string, runner: TaskRunner, errorTask: ExecutionTask | null, options?: {
46
+ cause?: unknown;
47
+ });
48
+ }
49
+ export {};
@@ -1 +1,4 @@
1
- export { truncateText, trimAttributes, descriptionOfTree, } from '@midscene/shared/extractor';
1
+ import type { BaseElement, ElementTreeNode } from '@midscene/shared/types';
2
+ import { trimAttributes, truncateText } from '@midscene/shared/extractor';
3
+ export { trimAttributes, truncateText };
4
+ export declare function descriptionOfTree<ElementType extends BaseElement = BaseElement>(tree: ElementTreeNode<ElementType>, truncateTextLength?: number, filterNonTextContent?: boolean, visibleOnly?: boolean): string;