@godscene/core 1.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +9 -0
  3. package/dist/es/agent/agent.mjs +767 -0
  4. package/dist/es/agent/common.mjs +0 -0
  5. package/dist/es/agent/execution-session.mjs +39 -0
  6. package/dist/es/agent/index.mjs +6 -0
  7. package/dist/es/agent/task-builder.mjs +343 -0
  8. package/dist/es/agent/task-cache.mjs +212 -0
  9. package/dist/es/agent/tasks.mjs +428 -0
  10. package/dist/es/agent/ui-utils.mjs +101 -0
  11. package/dist/es/agent/utils.mjs +167 -0
  12. package/dist/es/ai-model/auto-glm/actions.mjs +237 -0
  13. package/dist/es/ai-model/auto-glm/index.mjs +6 -0
  14. package/dist/es/ai-model/auto-glm/parser.mjs +237 -0
  15. package/dist/es/ai-model/auto-glm/planning.mjs +69 -0
  16. package/dist/es/ai-model/auto-glm/prompt.mjs +220 -0
  17. package/dist/es/ai-model/auto-glm/util.mjs +7 -0
  18. package/dist/es/ai-model/connectivity.mjs +136 -0
  19. package/dist/es/ai-model/conversation-history.mjs +193 -0
  20. package/dist/es/ai-model/index.mjs +12 -0
  21. package/dist/es/ai-model/inspect.mjs +395 -0
  22. package/dist/es/ai-model/llm-planning.mjs +231 -0
  23. package/dist/es/ai-model/prompt/common.mjs +5 -0
  24. package/dist/es/ai-model/prompt/describe.mjs +64 -0
  25. package/dist/es/ai-model/prompt/extraction.mjs +129 -0
  26. package/dist/es/ai-model/prompt/llm-locator.mjs +49 -0
  27. package/dist/es/ai-model/prompt/llm-planning.mjs +584 -0
  28. package/dist/es/ai-model/prompt/llm-section-locator.mjs +42 -0
  29. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +33 -0
  30. package/dist/es/ai-model/prompt/playwright-generator.mjs +115 -0
  31. package/dist/es/ai-model/prompt/ui-tars-planning.mjs +34 -0
  32. package/dist/es/ai-model/prompt/util.mjs +57 -0
  33. package/dist/es/ai-model/prompt/yaml-generator.mjs +201 -0
  34. package/dist/es/ai-model/service-caller/codex-app-server.mjs +573 -0
  35. package/dist/es/ai-model/service-caller/image-detail.mjs +4 -0
  36. package/dist/es/ai-model/service-caller/index.mjs +648 -0
  37. package/dist/es/ai-model/service-caller/request-timeout.mjs +47 -0
  38. package/dist/es/ai-model/ui-tars-planning.mjs +247 -0
  39. package/dist/es/common.mjs +382 -0
  40. package/dist/es/device/device-options.mjs +0 -0
  41. package/dist/es/device/index.mjs +340 -0
  42. package/dist/es/dump/html-utils.mjs +290 -0
  43. package/dist/es/dump/index.mjs +3 -0
  44. package/dist/es/dump/screenshot-restoration.mjs +30 -0
  45. package/dist/es/dump/screenshot-store.mjs +125 -0
  46. package/dist/es/index.mjs +17 -0
  47. package/dist/es/report-cli.mjs +149 -0
  48. package/dist/es/report-generator.mjs +203 -0
  49. package/dist/es/report-markdown.mjs +216 -0
  50. package/dist/es/report.mjs +287 -0
  51. package/dist/es/screenshot-item.mjs +120 -0
  52. package/dist/es/service/index.mjs +272 -0
  53. package/dist/es/service/utils.mjs +13 -0
  54. package/dist/es/skill/index.mjs +35 -0
  55. package/dist/es/task-runner.mjs +261 -0
  56. package/dist/es/task-timing.mjs +10 -0
  57. package/dist/es/tree.mjs +11 -0
  58. package/dist/es/types.mjs +202 -0
  59. package/dist/es/utils.mjs +232 -0
  60. package/dist/es/yaml/builder.mjs +11 -0
  61. package/dist/es/yaml/index.mjs +4 -0
  62. package/dist/es/yaml/player.mjs +425 -0
  63. package/dist/es/yaml/utils.mjs +100 -0
  64. package/dist/es/yaml.mjs +0 -0
  65. package/dist/lib/agent/agent.js +815 -0
  66. package/dist/lib/agent/common.js +5 -0
  67. package/dist/lib/agent/execution-session.js +73 -0
  68. package/dist/lib/agent/index.js +76 -0
  69. package/dist/lib/agent/task-builder.js +380 -0
  70. package/dist/lib/agent/task-cache.js +264 -0
  71. package/dist/lib/agent/tasks.js +471 -0
  72. package/dist/lib/agent/ui-utils.js +153 -0
  73. package/dist/lib/agent/utils.js +238 -0
  74. package/dist/lib/ai-model/auto-glm/actions.js +271 -0
  75. package/dist/lib/ai-model/auto-glm/index.js +64 -0
  76. package/dist/lib/ai-model/auto-glm/parser.js +280 -0
  77. package/dist/lib/ai-model/auto-glm/planning.js +103 -0
  78. package/dist/lib/ai-model/auto-glm/prompt.js +257 -0
  79. package/dist/lib/ai-model/auto-glm/util.js +44 -0
  80. package/dist/lib/ai-model/connectivity.js +180 -0
  81. package/dist/lib/ai-model/conversation-history.js +227 -0
  82. package/dist/lib/ai-model/index.js +127 -0
  83. package/dist/lib/ai-model/inspect.js +441 -0
  84. package/dist/lib/ai-model/llm-planning.js +268 -0
  85. package/dist/lib/ai-model/prompt/common.js +39 -0
  86. package/dist/lib/ai-model/prompt/describe.js +98 -0
  87. package/dist/lib/ai-model/prompt/extraction.js +169 -0
  88. package/dist/lib/ai-model/prompt/llm-locator.js +86 -0
  89. package/dist/lib/ai-model/prompt/llm-planning.js +621 -0
  90. package/dist/lib/ai-model/prompt/llm-section-locator.js +79 -0
  91. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +70 -0
  92. package/dist/lib/ai-model/prompt/playwright-generator.js +176 -0
  93. package/dist/lib/ai-model/prompt/ui-tars-planning.js +71 -0
  94. package/dist/lib/ai-model/prompt/util.js +103 -0
  95. package/dist/lib/ai-model/prompt/yaml-generator.js +262 -0
  96. package/dist/lib/ai-model/service-caller/codex-app-server.js +622 -0
  97. package/dist/lib/ai-model/service-caller/image-detail.js +38 -0
  98. package/dist/lib/ai-model/service-caller/index.js +716 -0
  99. package/dist/lib/ai-model/service-caller/request-timeout.js +93 -0
  100. package/dist/lib/ai-model/ui-tars-planning.js +281 -0
  101. package/dist/lib/common.js +491 -0
  102. package/dist/lib/device/device-options.js +18 -0
  103. package/dist/lib/device/index.js +467 -0
  104. package/dist/lib/dump/html-utils.js +366 -0
  105. package/dist/lib/dump/index.js +58 -0
  106. package/dist/lib/dump/screenshot-restoration.js +64 -0
  107. package/dist/lib/dump/screenshot-store.js +165 -0
  108. package/dist/lib/index.js +184 -0
  109. package/dist/lib/report-cli.js +189 -0
  110. package/dist/lib/report-generator.js +244 -0
  111. package/dist/lib/report-markdown.js +253 -0
  112. package/dist/lib/report.js +333 -0
  113. package/dist/lib/screenshot-item.js +154 -0
  114. package/dist/lib/service/index.js +306 -0
  115. package/dist/lib/service/utils.js +47 -0
  116. package/dist/lib/skill/index.js +69 -0
  117. package/dist/lib/task-runner.js +298 -0
  118. package/dist/lib/task-timing.js +44 -0
  119. package/dist/lib/tree.js +51 -0
  120. package/dist/lib/types.js +298 -0
  121. package/dist/lib/utils.js +314 -0
  122. package/dist/lib/yaml/builder.js +55 -0
  123. package/dist/lib/yaml/index.js +79 -0
  124. package/dist/lib/yaml/player.js +459 -0
  125. package/dist/lib/yaml/utils.js +153 -0
  126. package/dist/lib/yaml.js +18 -0
  127. package/dist/types/agent/agent.d.ts +220 -0
  128. package/dist/types/agent/common.d.ts +0 -0
  129. package/dist/types/agent/execution-session.d.ts +36 -0
  130. package/dist/types/agent/index.d.ts +9 -0
  131. package/dist/types/agent/task-builder.d.ts +34 -0
  132. package/dist/types/agent/task-cache.d.ts +49 -0
  133. package/dist/types/agent/tasks.d.ts +70 -0
  134. package/dist/types/agent/ui-utils.d.ts +14 -0
  135. package/dist/types/agent/utils.d.ts +25 -0
  136. package/dist/types/ai-model/auto-glm/actions.d.ts +78 -0
  137. package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
  138. package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
  139. package/dist/types/ai-model/auto-glm/planning.d.ts +12 -0
  140. package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
  141. package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
  142. package/dist/types/ai-model/connectivity.d.ts +20 -0
  143. package/dist/types/ai-model/conversation-history.d.ts +105 -0
  144. package/dist/types/ai-model/index.d.ts +16 -0
  145. package/dist/types/ai-model/inspect.d.ts +67 -0
  146. package/dist/types/ai-model/llm-planning.d.ts +19 -0
  147. package/dist/types/ai-model/prompt/common.d.ts +2 -0
  148. package/dist/types/ai-model/prompt/describe.d.ts +1 -0
  149. package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
  150. package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
  151. package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
  152. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
  153. package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
  154. package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
  155. package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
  156. package/dist/types/ai-model/prompt/util.d.ts +33 -0
  157. package/dist/types/ai-model/prompt/yaml-generator.d.ts +102 -0
  158. package/dist/types/ai-model/service-caller/codex-app-server.d.ts +42 -0
  159. package/dist/types/ai-model/service-caller/image-detail.d.ts +2 -0
  160. package/dist/types/ai-model/service-caller/index.d.ts +60 -0
  161. package/dist/types/ai-model/service-caller/request-timeout.d.ts +32 -0
  162. package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
  163. package/dist/types/common.d.ts +288 -0
  164. package/dist/types/device/device-options.d.ts +155 -0
  165. package/dist/types/device/index.d.ts +2565 -0
  166. package/dist/types/dump/html-utils.d.ts +75 -0
  167. package/dist/types/dump/index.d.ts +5 -0
  168. package/dist/types/dump/screenshot-restoration.d.ts +8 -0
  169. package/dist/types/dump/screenshot-store.d.ts +49 -0
  170. package/dist/types/index.d.ts +21 -0
  171. package/dist/types/report-cli.d.ts +36 -0
  172. package/dist/types/report-generator.d.ts +88 -0
  173. package/dist/types/report-markdown.d.ts +24 -0
  174. package/dist/types/report.d.ts +52 -0
  175. package/dist/types/screenshot-item.d.ts +67 -0
  176. package/dist/types/service/index.d.ts +24 -0
  177. package/dist/types/service/utils.d.ts +2 -0
  178. package/dist/types/skill/index.d.ts +25 -0
  179. package/dist/types/task-runner.d.ts +50 -0
  180. package/dist/types/task-timing.d.ts +8 -0
  181. package/dist/types/tree.d.ts +4 -0
  182. package/dist/types/types.d.ts +684 -0
  183. package/dist/types/utils.d.ts +45 -0
  184. package/dist/types/yaml/builder.d.ts +2 -0
  185. package/dist/types/yaml/index.d.ts +4 -0
  186. package/dist/types/yaml/player.d.ts +34 -0
  187. package/dist/types/yaml/utils.d.ts +9 -0
  188. package/dist/types/yaml.d.ts +215 -0
  189. package/package.json +130 -0
@@ -0,0 +1,288 @@
1
+ import type { BaseElement, DeviceAction, ElementTreeNode, MidsceneYamlFlowItem, PlanningAction, Rect, Size } from './types';
2
+ import type { ChatCompletionMessageParam } from 'openai/resources/index';
3
+ import type { PlanningLocateParam } from './types';
4
+ import type { TModelFamily } from '@godscene/shared/env';
5
+ import { z } from 'zod';
6
+ export type AIArgs = ChatCompletionMessageParam[];
7
+ type AdaptBboxInput = number[] | string[] | string | (number[] | string[])[];
8
+ /**
9
+ * Convert a point coordinate [0, 1000] to a small bbox [0, 1000]
10
+ * Creates a small bbox around the center point in the same coordinate space
11
+ *
12
+ * @param x - X coordinate in [0, 1000] range
13
+ * @param y - Y coordinate in [0, 1000] range
14
+ * @param bboxSize - Size of the bbox to create (default: 20)
15
+ * @returns [x1, y1, x2, y2] bbox in [0, 1000] coordinate space
16
+ */
17
+ export declare function pointToBbox(x: number, y: number, bboxSize?: number): [number, number, number, number];
18
+ export declare function fillBboxParam(locate: PlanningLocateParam, width: number, height: number, modelFamily: TModelFamily | undefined): PlanningLocateParam;
19
+ export declare function adaptQwen2_5Bbox(bbox: number[]): [number, number, number, number];
20
+ export declare function adaptGpt5Bbox(bbox: number[] | string[] | string): [number, number, number, number];
21
+ export declare function adaptDoubaoBbox(bbox: string[] | number[] | string, width: number, height: number): [number, number, number, number];
22
+ export declare function adaptBbox(bbox: AdaptBboxInput, width: number, height: number, modelFamily: TModelFamily | undefined): [number, number, number, number];
23
+ export declare function normalized01000(bbox: number[], width: number, height: number): [number, number, number, number];
24
+ export declare function adaptGeminiBbox(bbox: number[], width: number, height: number): [number, number, number, number];
25
+ export declare function adaptBboxToRect(bbox: number[], width: number, height: number, offsetX?: number, offsetY?: number, rightLimit?: number, bottomLimit?: number, modelFamily?: TModelFamily | undefined, scale?: number): Rect;
26
+ export declare function mergeRects(rects: Rect[]): {
27
+ left: number;
28
+ top: number;
29
+ width: number;
30
+ height: number;
31
+ };
32
+ /**
33
+ * Expand the search area to at least 400 x 400 pixels
34
+ *
35
+ * Step 1: Extend 100px on each side (top, right, bottom, left)
36
+ * - If the element is near a boundary, expansion on that side will be limited
37
+ * - No compensation is made for boundary limitations (this is intentional)
38
+ *
39
+ * Step 2: Ensure the area is at least 400x400 pixels
40
+ * - Scale up proportionally from the center if needed
41
+ * - Final result is clamped to screen boundaries
42
+ */
43
+ export declare function expandSearchArea(rect: Rect, screenSize: Size): Rect;
44
+ export declare function markupImageForLLM(screenshotBase64: string, tree: ElementTreeNode<BaseElement>, size: Size): Promise<string>;
45
+ export declare function buildYamlFlowFromPlans(plans: PlanningAction[], actionSpace: DeviceAction<any>[]): MidsceneYamlFlowItem[];
46
+ export declare const PointSchema: z.ZodObject<{
47
+ left: z.ZodNumber;
48
+ top: z.ZodNumber;
49
+ }, "strip", z.ZodTypeAny, {
50
+ left: number;
51
+ top: number;
52
+ }, {
53
+ left: number;
54
+ top: number;
55
+ }>;
56
+ export declare const SizeSchema: z.ZodObject<{
57
+ width: z.ZodNumber;
58
+ height: z.ZodNumber;
59
+ }, "strip", z.ZodTypeAny, {
60
+ width: number;
61
+ height: number;
62
+ }, {
63
+ width: number;
64
+ height: number;
65
+ }>;
66
+ export declare const RectSchema: z.ZodIntersection<z.ZodIntersection<z.ZodObject<{
67
+ left: z.ZodNumber;
68
+ top: z.ZodNumber;
69
+ }, "strip", z.ZodTypeAny, {
70
+ left: number;
71
+ top: number;
72
+ }, {
73
+ left: number;
74
+ top: number;
75
+ }>, z.ZodObject<{
76
+ width: z.ZodNumber;
77
+ height: z.ZodNumber;
78
+ }, "strip", z.ZodTypeAny, {
79
+ width: number;
80
+ height: number;
81
+ }, {
82
+ width: number;
83
+ height: number;
84
+ }>>, z.ZodObject<{
85
+ zoom: z.ZodOptional<z.ZodNumber>;
86
+ }, "strip", z.ZodTypeAny, {
87
+ zoom?: number | undefined;
88
+ }, {
89
+ zoom?: number | undefined;
90
+ }>>;
91
+ export declare const TMultimodalPromptSchema: z.ZodObject<{
92
+ images: z.ZodOptional<z.ZodArray<z.ZodObject<{
93
+ name: z.ZodString;
94
+ url: z.ZodString;
95
+ }, "strip", z.ZodTypeAny, {
96
+ name: string;
97
+ url: string;
98
+ }, {
99
+ name: string;
100
+ url: string;
101
+ }>, "many">>;
102
+ convertHttpImage2Base64: z.ZodOptional<z.ZodBoolean>;
103
+ }, "strip", z.ZodTypeAny, {
104
+ images?: {
105
+ name: string;
106
+ url: string;
107
+ }[] | undefined;
108
+ convertHttpImage2Base64?: boolean | undefined;
109
+ }, {
110
+ images?: {
111
+ name: string;
112
+ url: string;
113
+ }[] | undefined;
114
+ convertHttpImage2Base64?: boolean | undefined;
115
+ }>;
116
+ export declare const TUserPromptSchema: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
117
+ prompt: z.ZodString;
118
+ }, "strip", z.ZodTypeAny, {
119
+ prompt: string;
120
+ }, {
121
+ prompt: string;
122
+ }>, z.ZodObject<{
123
+ images: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
124
+ name: z.ZodString;
125
+ url: z.ZodString;
126
+ }, "strip", z.ZodTypeAny, {
127
+ name: string;
128
+ url: string;
129
+ }, {
130
+ name: string;
131
+ url: string;
132
+ }>, "many">>>;
133
+ convertHttpImage2Base64: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
134
+ }, "strip", z.ZodTypeAny, {
135
+ images?: {
136
+ name: string;
137
+ url: string;
138
+ }[] | undefined;
139
+ convertHttpImage2Base64?: boolean | undefined;
140
+ }, {
141
+ images?: {
142
+ name: string;
143
+ url: string;
144
+ }[] | undefined;
145
+ convertHttpImage2Base64?: boolean | undefined;
146
+ }>>]>;
147
+ export type TMultimodalPrompt = z.infer<typeof TMultimodalPromptSchema>;
148
+ export type TUserPrompt = z.infer<typeof TUserPromptSchema>;
149
+ /**
150
+ * Returns the schema for locator fields.
151
+ * This now returns the input schema which is more permissive and suitable for validation.
152
+ */
153
+ export declare const getMidsceneLocationSchema: () => z.ZodObject<{
154
+ prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
155
+ prompt: z.ZodString;
156
+ }, "strip", z.ZodTypeAny, {
157
+ prompt: string;
158
+ }, {
159
+ prompt: string;
160
+ }>, z.ZodObject<{
161
+ images: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
162
+ name: z.ZodString;
163
+ url: z.ZodString;
164
+ }, "strip", z.ZodTypeAny, {
165
+ name: string;
166
+ url: string;
167
+ }, {
168
+ name: string;
169
+ url: string;
170
+ }>, "many">>>;
171
+ convertHttpImage2Base64: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
172
+ }, "strip", z.ZodTypeAny, {
173
+ images?: {
174
+ name: string;
175
+ url: string;
176
+ }[] | undefined;
177
+ convertHttpImage2Base64?: boolean | undefined;
178
+ }, {
179
+ images?: {
180
+ name: string;
181
+ url: string;
182
+ }[] | undefined;
183
+ convertHttpImage2Base64?: boolean | undefined;
184
+ }>>]>;
185
+ deepLocate: z.ZodOptional<z.ZodBoolean>;
186
+ deepThink: z.ZodOptional<z.ZodBoolean>;
187
+ cacheable: z.ZodOptional<z.ZodBoolean>;
188
+ xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
189
+ }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
190
+ prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
191
+ prompt: z.ZodString;
192
+ }, "strip", z.ZodTypeAny, {
193
+ prompt: string;
194
+ }, {
195
+ prompt: string;
196
+ }>, z.ZodObject<{
197
+ images: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
198
+ name: z.ZodString;
199
+ url: z.ZodString;
200
+ }, "strip", z.ZodTypeAny, {
201
+ name: string;
202
+ url: string;
203
+ }, {
204
+ name: string;
205
+ url: string;
206
+ }>, "many">>>;
207
+ convertHttpImage2Base64: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
208
+ }, "strip", z.ZodTypeAny, {
209
+ images?: {
210
+ name: string;
211
+ url: string;
212
+ }[] | undefined;
213
+ convertHttpImage2Base64?: boolean | undefined;
214
+ }, {
215
+ images?: {
216
+ name: string;
217
+ url: string;
218
+ }[] | undefined;
219
+ convertHttpImage2Base64?: boolean | undefined;
220
+ }>>]>;
221
+ deepLocate: z.ZodOptional<z.ZodBoolean>;
222
+ deepThink: z.ZodOptional<z.ZodBoolean>;
223
+ cacheable: z.ZodOptional<z.ZodBoolean>;
224
+ xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
225
+ }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
226
+ prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
227
+ prompt: z.ZodString;
228
+ }, "strip", z.ZodTypeAny, {
229
+ prompt: string;
230
+ }, {
231
+ prompt: string;
232
+ }>, z.ZodObject<{
233
+ images: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
234
+ name: z.ZodString;
235
+ url: z.ZodString;
236
+ }, "strip", z.ZodTypeAny, {
237
+ name: string;
238
+ url: string;
239
+ }, {
240
+ name: string;
241
+ url: string;
242
+ }>, "many">>>;
243
+ convertHttpImage2Base64: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
244
+ }, "strip", z.ZodTypeAny, {
245
+ images?: {
246
+ name: string;
247
+ url: string;
248
+ }[] | undefined;
249
+ convertHttpImage2Base64?: boolean | undefined;
250
+ }, {
251
+ images?: {
252
+ name: string;
253
+ url: string;
254
+ }[] | undefined;
255
+ convertHttpImage2Base64?: boolean | undefined;
256
+ }>>]>;
257
+ deepLocate: z.ZodOptional<z.ZodBoolean>;
258
+ deepThink: z.ZodOptional<z.ZodBoolean>;
259
+ cacheable: z.ZodOptional<z.ZodBoolean>;
260
+ xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
261
+ }, z.ZodTypeAny, "passthrough">>;
262
+ export declare const ifMidsceneLocatorField: (field: any) => boolean;
263
+ export declare const dumpMidsceneLocatorField: (field: any) => string;
264
+ export declare const findAllMidsceneLocatorField: (zodType?: z.ZodType<any>, requiredOnly?: boolean) => string[];
265
+ export declare const dumpActionParam: (jsonObject: Record<string, any>, zodSchema: z.ZodType<any>) => Record<string, any>;
266
+ /**
267
+ * Parse and validate action parameters using Zod schema.
268
+ * All fields are validated through Zod, EXCEPT locator fields which are skipped.
269
+ * Default values defined in the schema are automatically applied.
270
+ *
271
+ * Locator fields are special business logic fields with complex validation requirements,
272
+ * so they are intentionally excluded from Zod parsing and use existing validation logic.
273
+ *
274
+ * When shrunkShotToLogicalRatio is provided and !== 1, coordinates in locate fields
275
+ * are transformed from screenshot space to logical space.
276
+ */
277
+ export declare const parseActionParam: (rawParam: Record<string, any> | undefined, zodSchema?: z.ZodType<any>, options?: {
278
+ shrunkShotToLogicalRatio?: number;
279
+ }) => Record<string, any> | undefined;
280
+ export declare const finalizeActionName = "Finalize";
281
+ /**
282
+ * Get a readable time string for a given timestamp or the current time
283
+ * @param format - Optional format string. Supports: YYYY, MM, DD, HH, mm, ss. Default: 'YYYY-MM-DD HH:mm:ss'
284
+ * @param timestamp - Optional timestamp in milliseconds. If not provided, uses current system time.
285
+ * @returns A formatted time string with format label
286
+ */
287
+ export declare const getReadableTimeString: (format?: string, timestamp?: number) => string;
288
+ export {};
@@ -0,0 +1,155 @@
1
+ import type { DeviceAction } from '../types';
2
+ /**
3
+ * Android device input options
4
+ */
5
+ export type AndroidDeviceInputOpt = {
6
+ /** Automatically dismiss the keyboard after input is completed */
7
+ autoDismissKeyboard?: boolean;
8
+ /** Strategy for dismissing the keyboard: 'esc-first' tries ESC before BACK, 'back-first' tries BACK before ESC */
9
+ keyboardDismissStrategy?: 'esc-first' | 'back-first';
10
+ };
11
+ /**
12
+ * Android device options
13
+ */
14
+ export type AndroidDeviceOpt = {
15
+ /** Path to the ADB executable */
16
+ androidAdbPath?: string;
17
+ /** Remote ADB host address */
18
+ remoteAdbHost?: string;
19
+ /** Remote ADB port */
20
+ remoteAdbPort?: number;
21
+ /** Input method editor strategy: 'always-yadb' always uses yadb, 'yadb-for-non-ascii' uses yadb only for non-ASCII characters */
22
+ imeStrategy?: 'always-yadb' | 'yadb-for-non-ascii';
23
+ /** Display ID to use for this device */
24
+ displayId?: number;
25
+ /** Use physical display ID for screenshot operations */
26
+ usePhysicalDisplayIdForScreenshot?: boolean;
27
+ /** Use physical display ID when looking up display information */
28
+ usePhysicalDisplayIdForDisplayLookup?: boolean;
29
+ /** Custom device actions to register */
30
+ customActions?: DeviceAction<any>[];
31
+ /**
32
+ * @deprecated This option has been removed and no longer has any effect.
33
+ * Use `screenshotShrinkFactor` in AgentOpt instead to control screenshot size sent to AI model.
34
+ */
35
+ screenshotResizeScale?: number;
36
+ /** Always fetch screen info on each call; if false, cache the first result */
37
+ alwaysRefreshScreenInfo?: boolean;
38
+ /**
39
+ * Screenshot buffer size validation threshold in bytes. Buffers below this
40
+ * value are treated as failed or corrupted captures. Defaults to 1024 (1KB).
41
+ * Set to 0 to skip only this size check; empty-buffer and image-format
42
+ * validation still run.
43
+ */
44
+ minScreenshotBufferSize?: number;
45
+ /**
46
+ * Scrcpy screenshot configuration for high-performance screen capture.
47
+ *
48
+ * Scrcpy provides 6-8x faster screenshots by streaming H.264 video from the device.
49
+ * When enabled, scrcpy will:
50
+ * 1. Start a video stream from the device on first screenshot request
51
+ * 2. Keep the connection alive for subsequent screenshots (16-50ms each)
52
+ * 3. Automatically disconnect after idle timeout to save resources
53
+ * 4. Fallback to standard ADB mode if unavailable
54
+ *
55
+ * @example
56
+ * ```typescript
57
+ * // Enable scrcpy for high-performance screenshots
58
+ * const device = new AndroidDevice(deviceId, {
59
+ * scrcpyConfig: {
60
+ * enabled: true,
61
+ * },
62
+ * });
63
+ *
64
+ * // Custom configuration
65
+ * const device = new AndroidDevice(deviceId, {
66
+ * scrcpyConfig: {
67
+ * enabled: true,
68
+ * maxSize: 0, // 0 = no scaling
69
+ * idleTimeoutMs: 30000,
70
+ * videoBitRate: 8_000_000,
71
+ * },
72
+ * });
73
+ * ```
74
+ */
75
+ scrcpyConfig?: {
76
+ /**
77
+ * Enable scrcpy for high-performance screenshots.
78
+ * @default false
79
+ */
80
+ enabled?: boolean;
81
+ /**
82
+ * Maximum video dimension (width or height).
83
+ * Video stream will be scaled down if device resolution exceeds this value.
84
+ * Lower values reduce bandwidth but may affect image quality.
85
+ *
86
+ * @default 0 (no scaling, use original resolution)
87
+ * @example
88
+ * { maxSize: 1024 } // Always scale to 1024
89
+ */
90
+ maxSize?: number;
91
+ /**
92
+ * Idle timeout in milliseconds before disconnecting scrcpy.
93
+ * Connection auto-closes after this period of inactivity to save resources.
94
+ * Set to 0 to disable auto-disconnect.
95
+ * @default 30000 (30 seconds)
96
+ */
97
+ idleTimeoutMs?: number;
98
+ /**
99
+ * Video bit rate for H.264 encoding in bits per second.
100
+ * Higher values improve quality but increase bandwidth usage.
101
+ * @default 2000000 (2 Mbps)
102
+ */
103
+ videoBitRate?: number;
104
+ };
105
+ } & AndroidDeviceInputOpt;
106
+ /**
107
+ * iOS device input options
108
+ */
109
+ export type IOSDeviceInputOpt = {
110
+ /** Automatically dismiss the keyboard after input is completed */
111
+ autoDismissKeyboard?: boolean;
112
+ };
113
+ /**
114
+ * iOS device options
115
+ */
116
+ export type IOSDeviceOpt = {
117
+ /** Device ID (UDID) to connect to */
118
+ deviceId?: string;
119
+ /**
120
+ * Optional npm module path used to override the default iOS device implementation.
121
+ * The target module must export an `IOSDevice` class (or default export) compatible with Midscene's iOS device interface.
122
+ */
123
+ iOSDeviceClassOverride?: string;
124
+ /** Custom device actions to register */
125
+ customActions?: DeviceAction<any>[];
126
+ /** WebDriverAgent port (default: 8100) */
127
+ wdaPort?: number;
128
+ /** WebDriverAgent host (default: 'localhost') */
129
+ wdaHost?: string;
130
+ /** Whether to use WebDriverAgent */
131
+ useWDA?: boolean;
132
+ /** WDA MJPEG server port for real-time screen streaming (default: 9100) */
133
+ wdaMjpegPort?: number;
134
+ } & IOSDeviceInputOpt;
135
+ /**
136
+ * HarmonyOS device input options
137
+ */
138
+ export type HarmonyDeviceInputOpt = {
139
+ /** Automatically dismiss the keyboard after input is completed */
140
+ autoDismissKeyboard?: boolean;
141
+ };
142
+ /**
143
+ * HarmonyOS device options
144
+ */
145
+ export type HarmonyDeviceOpt = {
146
+ /** Path to the HDC executable */
147
+ hdcPath?: string;
148
+ /** Custom device actions to register */
149
+ customActions?: DeviceAction<any>[];
150
+ /**
151
+ * @deprecated This option has been removed and no longer has any effect.
152
+ * Use `screenshotShrinkFactor` in AgentOpt instead to control screenshot size sent to AI model.
153
+ */
154
+ screenshotResizeScale?: number;
155
+ } & HarmonyDeviceInputOpt;