@midscene/core 0.26.5-beta-20250814095614.0 → 0.26.5-beta-20250814125155.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. package/dist/es/ai-model/action-executor.mjs +139 -0
  2. package/dist/es/ai-model/action-executor.mjs.map +1 -0
  3. package/dist/es/ai-model/common.mjs +219 -0
  4. package/dist/es/ai-model/common.mjs.map +1 -0
  5. package/dist/es/ai-model/index.mjs +10 -0
  6. package/dist/es/ai-model/inspect.mjs +317 -0
  7. package/dist/es/ai-model/inspect.mjs.map +1 -0
  8. package/dist/es/ai-model/llm-planning.mjs +85 -0
  9. package/dist/es/ai-model/llm-planning.mjs.map +1 -0
  10. package/dist/es/ai-model/prompt/assertion.mjs +55 -0
  11. package/dist/es/ai-model/prompt/assertion.mjs.map +1 -0
  12. package/dist/es/ai-model/prompt/common.mjs +7 -0
  13. package/dist/es/ai-model/prompt/common.mjs.map +1 -0
  14. package/dist/es/ai-model/prompt/describe.mjs +44 -0
  15. package/dist/es/ai-model/prompt/describe.mjs.map +1 -0
  16. package/dist/es/ai-model/prompt/extraction.mjs +137 -0
  17. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -0
  18. package/dist/es/ai-model/prompt/llm-locator.mjs +275 -0
  19. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -0
  20. package/dist/es/ai-model/prompt/llm-planning.mjs +359 -0
  21. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -0
  22. package/dist/es/ai-model/prompt/llm-section-locator.mjs +47 -0
  23. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -0
  24. package/dist/es/ai-model/prompt/playwright-generator.mjs +117 -0
  25. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -0
  26. package/dist/es/ai-model/prompt/ui-tars-locator.mjs +34 -0
  27. package/dist/es/ai-model/prompt/ui-tars-locator.mjs.map +1 -0
  28. package/dist/es/ai-model/prompt/ui-tars-planning.mjs +36 -0
  29. package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -0
  30. package/dist/es/ai-model/prompt/util.mjs +123 -0
  31. package/dist/es/ai-model/prompt/util.mjs.map +1 -0
  32. package/dist/es/ai-model/prompt/yaml-generator.mjs +219 -0
  33. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -0
  34. package/dist/es/ai-model/service-caller/index.mjs +413 -0
  35. package/dist/es/ai-model/service-caller/index.mjs.map +1 -0
  36. package/dist/es/ai-model/ui-tars-planning.mjs +235 -0
  37. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -0
  38. package/dist/es/image/index.mjs +2 -0
  39. package/dist/es/index.mjs +7 -2360
  40. package/dist/es/index.mjs.map +1 -1
  41. package/dist/es/insight/index.mjs +261 -0
  42. package/dist/es/insight/index.mjs.map +1 -0
  43. package/dist/es/insight/utils.mjs +19 -0
  44. package/dist/es/insight/utils.mjs.map +1 -0
  45. package/dist/es/types.mjs +11 -0
  46. package/dist/es/types.mjs.map +1 -0
  47. package/dist/es/utils.mjs +2 -2
  48. package/dist/es/yaml.mjs +0 -0
  49. package/dist/lib/ai-model/action-executor.js +173 -0
  50. package/dist/lib/ai-model/action-executor.js.map +1 -0
  51. package/dist/lib/ai-model/common.js +289 -0
  52. package/dist/lib/ai-model/common.js.map +1 -0
  53. package/dist/lib/ai-model/index.js +103 -0
  54. package/dist/lib/ai-model/index.js.map +1 -0
  55. package/dist/lib/ai-model/inspect.js +360 -0
  56. package/dist/lib/ai-model/inspect.js.map +1 -0
  57. package/dist/lib/ai-model/llm-planning.js +119 -0
  58. package/dist/lib/ai-model/llm-planning.js.map +1 -0
  59. package/dist/lib/ai-model/prompt/assertion.js +92 -0
  60. package/dist/lib/ai-model/prompt/assertion.js.map +1 -0
  61. package/dist/lib/ai-model/prompt/common.js +41 -0
  62. package/dist/lib/ai-model/prompt/common.js.map +1 -0
  63. package/dist/lib/ai-model/prompt/describe.js +78 -0
  64. package/dist/lib/ai-model/prompt/describe.js.map +1 -0
  65. package/dist/lib/ai-model/prompt/extraction.js +177 -0
  66. package/dist/lib/ai-model/prompt/extraction.js.map +1 -0
  67. package/dist/lib/ai-model/prompt/llm-locator.js +315 -0
  68. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -0
  69. package/dist/lib/ai-model/prompt/llm-planning.js +415 -0
  70. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -0
  71. package/dist/lib/ai-model/prompt/llm-section-locator.js +84 -0
  72. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -0
  73. package/dist/lib/ai-model/prompt/playwright-generator.js +178 -0
  74. package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -0
  75. package/dist/lib/ai-model/prompt/ui-tars-locator.js +68 -0
  76. package/dist/lib/ai-model/prompt/ui-tars-locator.js.map +1 -0
  77. package/dist/lib/ai-model/prompt/ui-tars-planning.js +73 -0
  78. package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -0
  79. package/dist/lib/ai-model/prompt/util.js +175 -0
  80. package/dist/lib/ai-model/prompt/util.js.map +1 -0
  81. package/dist/lib/ai-model/prompt/yaml-generator.js +280 -0
  82. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -0
  83. package/dist/lib/ai-model/service-caller/index.js +496 -0
  84. package/dist/lib/ai-model/service-caller/index.js.map +1 -0
  85. package/dist/lib/ai-model/ui-tars-planning.js +272 -0
  86. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -0
  87. package/dist/lib/image/index.js +56 -0
  88. package/dist/lib/image/index.js.map +1 -0
  89. package/dist/lib/index.js +21 -2393
  90. package/dist/lib/index.js.map +1 -1
  91. package/dist/lib/insight/index.js +295 -0
  92. package/dist/lib/insight/index.js.map +1 -0
  93. package/dist/lib/insight/utils.js +53 -0
  94. package/dist/lib/insight/utils.js.map +1 -0
  95. package/dist/lib/types.js +82 -0
  96. package/dist/lib/types.js.map +1 -0
  97. package/dist/lib/utils.js +2 -2
  98. package/dist/lib/yaml.js +20 -0
  99. package/dist/lib/yaml.js.map +1 -0
  100. package/dist/types/ai-model/action-executor.d.ts +19 -0
  101. package/dist/types/ai-model/common.d.ts +34 -0
  102. package/dist/types/ai-model/index.d.ts +11 -0
  103. package/dist/types/ai-model/inspect.d.ts +49 -0
  104. package/dist/types/ai-model/llm-planning.d.ts +10 -0
  105. package/dist/types/ai-model/prompt/assertion.d.ts +5 -0
  106. package/dist/types/ai-model/prompt/common.d.ts +2 -0
  107. package/dist/types/ai-model/prompt/describe.d.ts +1 -0
  108. package/dist/types/ai-model/prompt/extraction.d.ts +4 -0
  109. package/dist/types/ai-model/prompt/llm-locator.d.ts +9 -0
  110. package/dist/types/ai-model/prompt/llm-planning.d.ts +15 -0
  111. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +6 -0
  112. package/dist/types/ai-model/prompt/playwright-generator.d.ts +25 -0
  113. package/dist/types/ai-model/prompt/ui-tars-locator.d.ts +1 -0
  114. package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
  115. package/dist/types/ai-model/prompt/util.d.ts +45 -0
  116. package/dist/types/ai-model/prompt/yaml-generator.d.ts +99 -0
  117. package/dist/types/ai-model/service-caller/index.d.ts +26 -0
  118. package/dist/types/ai-model/ui-tars-planning.d.ts +76 -0
  119. package/dist/types/image/index.d.ts +1 -0
  120. package/dist/types/index.d.ts +9 -1289
  121. package/dist/types/insight/index.d.ts +26 -0
  122. package/dist/types/insight/utils.d.ts +2 -0
  123. package/dist/types/tree.d.ts +1 -11
  124. package/dist/types/types.d.ts +399 -0
  125. package/dist/types/utils.d.ts +27 -47
  126. package/dist/types/yaml.d.ts +172 -0
  127. package/package.json +6 -6
  128. package/dist/es/ai-model.mjs +0 -2502
  129. package/dist/es/ai-model.mjs.map +0 -1
  130. package/dist/lib/ai-model.js +0 -2622
  131. package/dist/lib/ai-model.js.map +0 -1
  132. package/dist/types/ai-model.d.ts +0 -596
@@ -1,596 +0,0 @@
1
- import { actionParser } from '@ui-tars/action-parser';
2
- import { BaseElement } from '@midscene/shared/types';
3
- import { ChatCompletionMessageParam } from 'openai/resources/index';
4
- import type { ChatCompletionSystemMessageParam } from 'openai/resources/index';
5
- import type { ChatCompletionUserMessageParam } from 'openai/resources/index';
6
- import { ElementTreeNode } from '@midscene/shared/types';
7
- import type { NodeType } from '@midscene/shared/constants';
8
- import OpenAI from 'openai';
9
- import { Rect } from '@midscene/shared/types';
10
- import { Size } from '@midscene/shared/types';
11
- import type { vlLocateMode } from '@midscene/shared/env';
12
-
13
- export declare function adaptBboxToRect(bbox: number[], width: number, height: number, offsetX?: number, offsetY?: number): Rect;
14
-
15
- export declare enum AIActionType {
16
- ASSERT = 0,
17
- INSPECT_ELEMENT = 1,
18
- EXTRACT_DATA = 2,
19
- PLAN = 3,
20
- DESCRIBE_ELEMENT = 4
21
- }
22
-
23
- export declare type AIArgs = [
24
- ChatCompletionSystemMessageParam,
25
- ...ChatCompletionUserMessageParam[]
26
- ];
27
-
28
- export declare function AiAssert<ElementType extends BaseElement = BaseElement>(options: {
29
- assertion: TUserPrompt;
30
- context: UIContext<ElementType>;
31
- }): Promise<{
32
- content: AIAssertionResponse;
33
- usage: AIUsageInfo | undefined;
34
- }>;
35
-
36
- declare interface AIAssertionResponse {
37
- pass: boolean;
38
- thought: string;
39
- }
40
-
41
- declare interface AIDataExtractionResponse<DataDemand> {
42
- data: DataDemand;
43
- errors?: string[];
44
- thought?: string;
45
- }
46
-
47
- declare interface AIElementCoordinatesResponse {
48
- bbox: [number, number, number, number];
49
- isOrderSensitive?: boolean;
50
- errors?: string[];
51
- }
52
-
53
- declare interface AIElementLocatorResponse {
54
- elements: {
55
- id: string;
56
- reason?: string;
57
- text?: string;
58
- xpaths?: string[];
59
- }[];
60
- bbox?: [number, number, number, number];
61
- isOrderSensitive?: boolean;
62
- errors?: string[];
63
- }
64
-
65
- declare type AIElementResponse =
66
- | AIElementLocatorResponse
67
- | AIElementCoordinatesResponse;
68
-
69
- export declare function AiExtractElementInfo<T, ElementType extends BaseElement = BaseElement>(options: {
70
- dataQuery: string | Record<string, string>;
71
- multimodalPrompt?: TMultimodalPrompt;
72
- context: UIContext<ElementType>;
73
- extractOption?: InsightExtractOption;
74
- }): Promise<{
75
- parseResult: AIDataExtractionResponse<T>;
76
- elementById: (idOrIndexId: string) => ElementType;
77
- usage: AIUsageInfo | undefined;
78
- }>;
79
-
80
- export declare function AiLocateElement<ElementType extends BaseElement = BaseElement>(options: {
81
- context: UIContext<ElementType>;
82
- targetElementDescription: TUserPrompt;
83
- referenceImage?: ReferenceImage;
84
- callAI?: typeof callAiFn<AIElementResponse | [number, number]>;
85
- searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;
86
- }): Promise<{
87
- parseResult: AIElementLocatorResponse;
88
- rect?: Rect;
89
- rawResponse: string;
90
- elementById: ElementById;
91
- usage?: AIUsageInfo;
92
- isOrderSensitive?: boolean;
93
- }>;
94
-
95
- export declare function AiLocateSection(options: {
96
- context: UIContext<BaseElement>;
97
- sectionDescription: TUserPrompt;
98
- callAI?: typeof callAiFn<AISectionLocatorResponse>;
99
- }): Promise<{
100
- rect?: Rect;
101
- imageBase64?: string;
102
- error?: string;
103
- rawResponse: string;
104
- usage?: AIUsageInfo;
105
- }>;
106
-
107
- declare interface AISectionLocatorResponse {
108
- bbox: [number, number, number, number];
109
- references_bbox?: [number, number, number, number][];
110
- error?: string;
111
- }
112
-
113
- declare type AIUsageInfo = Record<string, any> & {
114
- prompt_tokens: number | undefined;
115
- completion_tokens: number | undefined;
116
- total_tokens: number | undefined;
117
- time_cost: number | undefined;
118
- };
119
-
120
- export declare function callAi(messages: ChatCompletionMessageParam[], AIActionTypeValue: AIActionType, responseFormat?: OpenAI.ChatCompletionCreateParams['response_format'] | OpenAI.ResponseFormatJSONObject, options?: {
121
- stream?: boolean;
122
- onChunk?: StreamingCallback;
123
- }): Promise<{
124
- content: string;
125
- usage?: AIUsageInfo;
126
- isStreamed: boolean;
127
- }>;
128
-
129
- export declare function callAiFn<T>(msgs: AIArgs, AIActionTypeValue: AIActionType): Promise<{
130
- content: T;
131
- usage?: AIUsageInfo;
132
- }>;
133
-
134
- export declare function callAiFnWithStringResponse<T>(msgs: AIArgs, AIActionTypeValue: AIActionType): Promise<{
135
- content: string;
136
- usage?: AIUsageInfo;
137
- }>;
138
-
139
- export declare function callToGetJSONObject<T>(messages: ChatCompletionMessageParam[], AIActionTypeValue: AIActionType): Promise<{
140
- content: T;
141
- usage?: AIUsageInfo;
142
- }>;
143
-
144
- export { ChatCompletionMessageParam }
145
-
146
- declare interface ChromeRecordedEvent {
147
- type: string;
148
- timestamp: number;
149
- url?: string;
150
- title?: string;
151
- elementDescription?: string;
152
- value?: string;
153
- pageInfo?: any;
154
- elementRect?: any;
155
- screenshotBefore?: string;
156
- screenshotAfter?: string;
157
- screenshotWithBox?: string;
158
- }
159
-
160
- declare interface CodeGenerationChunk {
161
- /** The incremental content chunk */
162
- content: string;
163
- /** The reasoning content */
164
- reasoning_content: string;
165
- /** The accumulated content so far */
166
- accumulated: string;
167
- /** Whether this is the final chunk */
168
- isComplete: boolean;
169
- /** Token usage information if available */
170
- usage?: AIUsageInfo;
171
- }
172
-
173
- export declare function describeUserPage<ElementType extends BaseElement = BaseElement>(context: Omit<UIContext<ElementType>, 'describer'>, opt?: {
174
- truncateTextLength?: number;
175
- filterNonTextContent?: boolean;
176
- domIncluded?: boolean | 'visible-only';
177
- visibleOnly?: boolean;
178
- }): Promise<{
179
- description: string;
180
- elementById(idOrIndexId: string): ElementType;
181
- elementByPosition(position: {
182
- x: number;
183
- y: number;
184
- }, size: {
185
- width: number;
186
- height: number;
187
- }): BaseElement | undefined;
188
- insertElementByPosition(position: {
189
- x: number;
190
- y: number;
191
- }): ElementType;
192
- size: {
193
- width: number;
194
- height: number;
195
- };
196
- }>;
197
-
198
- declare interface DetailedLocateParam extends LocateOption {
199
- prompt: TUserPrompt;
200
- referenceImage?: ReferenceImage;
201
- }
202
-
203
- declare interface DeviceAction<ParamType = any> {
204
- name: string;
205
- description?: string;
206
- paramSchema?: string;
207
- paramDescription?: string;
208
- location?: 'required' | 'optional' | false;
209
- whatToLocate?: string; // what to locate if location is required or optional
210
- call: (context: ExecutorContext, param: ParamType) => Promise<void> | void;
211
- }
212
-
213
- declare type ElementById = (id: string) => BaseElement | null;
214
-
215
- export declare function elementByPositionWithElementInfo(treeRoot: ElementTreeNode<BaseElement>, position: {
216
- x: number;
217
- y: number;
218
- }, options?: {
219
- requireStrictDistance?: boolean;
220
- filterPositionElements?: boolean;
221
- }): BaseElement | undefined;
222
-
223
- declare interface ExecutionRecorderItem {
224
- type: 'screenshot';
225
- ts: number;
226
- screenshot?: string;
227
- timing?: string;
228
- }
229
-
230
- declare type ExecutionTask<
231
- E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<
232
- any,
233
- any,
234
- any
235
- >,
236
- > = E &
237
- ExecutionTaskReturn<
238
- E extends ExecutionTaskApply<any, any, infer TaskOutput, any>
239
- ? TaskOutput
240
- : unknown,
241
- E extends ExecutionTaskApply<any, any, any, infer TaskLog>
242
- ? TaskLog
243
- : unknown
244
- > & {
245
- status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';
246
- error?: Error;
247
- errorMessage?: string;
248
- errorStack?: string;
249
- timing?: {
250
- start: number;
251
- end?: number;
252
- cost?: number;
253
- };
254
- usage?: AIUsageInfo;
255
- };
256
-
257
- declare interface ExecutionTaskApply<
258
- Type extends ExecutionTaskType = any,
259
- TaskParam = any,
260
- TaskOutput = any,
261
- TaskLog = any,
262
- > {
263
- type: Type;
264
- subType?: string;
265
- param?: TaskParam;
266
- thought?: string;
267
- locate?: PlanningLocateParam | null;
268
- pageContext?: UIContext;
269
- executor: (
270
- param: TaskParam,
271
- context: ExecutorContext,
272
- ) => // biome-ignore lint/suspicious/noConfusingVoidType: <explanation>
273
- | Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void>
274
- | undefined
275
- | void;
276
- }
277
-
278
- declare interface ExecutionTaskHitBy {
279
- from: string;
280
- context: Record<string, any>;
281
- }
282
-
283
- declare interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {
284
- output?: TaskOutput;
285
- log?: TaskLog;
286
- recorder?: ExecutionRecorderItem[];
287
- hitBy?: ExecutionTaskHitBy;
288
- }
289
-
290
- declare type ExecutionTaskType =
291
- | 'Planning'
292
- | 'Insight'
293
- | 'Action'
294
- | 'Assertion'
295
- | 'Log';
296
-
297
- declare interface ExecutorContext {
298
- task: ExecutionTask;
299
- element?: LocateResultElement | null;
300
- }
301
-
302
- /**
303
- * Generates Playwright test code from recorded events
304
- */
305
- export declare const generatePlaywrightTest: (events: ChromeRecordedEvent[], options?: PlaywrightGenerationOptions) => Promise<string>;
306
-
307
- /**
308
- * Generates Playwright test code from recorded events with streaming support
309
- */
310
- export declare const generatePlaywrightTestStream: (events: ChromeRecordedEvent[], options?: PlaywrightGenerationOptions & StreamingCodeGenerationOptions) => Promise<StreamingAIResponse>;
311
-
312
- /**
313
- * Generates YAML test configuration from recorded events using AI
314
- */
315
- export declare const generateYamlTest: (events: ChromeRecordedEvent[], options?: YamlGenerationOptions) => Promise<string>;
316
-
317
- /**
318
- * Generates YAML test configuration from recorded events using AI with streaming support
319
- */
320
- export declare const generateYamlTestStream: (events: ChromeRecordedEvent[], options?: YamlGenerationOptions & StreamingCodeGenerationOptions) => Promise<StreamingAIResponse>;
321
-
322
- declare interface InsightExtractOption {
323
- domIncluded?: boolean | 'visible-only';
324
- screenshotIncluded?: boolean;
325
- returnThought?: boolean;
326
- }
327
-
328
- declare interface LocateOption {
329
- deepThink?: boolean; // only available in vl model
330
- cacheable?: boolean; // user can set this param to false to disable the cache for a single agent api
331
- xpath?: string; // only available in web
332
- pageContext?: UIContext<BaseElement>;
333
- }
334
-
335
- declare type LocateResultElement = {
336
- id: string;
337
- indexId?: number;
338
- center: [number, number];
339
- rect: Rect;
340
- xpaths: string[];
341
- attributes: {
342
- nodeType: NodeType;
343
- [key: string]: string;
344
- };
345
- isOrderSensitive?: boolean;
346
- };
347
-
348
- declare type MidsceneYamlFlowItem =
349
- | MidsceneYamlFlowItemAIAction
350
- | MidsceneYamlFlowItemAIAssert
351
- | MidsceneYamlFlowItemAIQuery
352
- | MidsceneYamlFlowItemAIWaitFor
353
- | MidsceneYamlFlowItemAITap
354
- | MidsceneYamlFlowItemAIRightClick
355
- | MidsceneYamlFlowItemAIHover
356
- | MidsceneYamlFlowItemAIInput
357
- | MidsceneYamlFlowItemAIKeyboardPress
358
- | MidsceneYamlFlowItemAIScroll
359
- | MidsceneYamlFlowItemSleep
360
- | MidsceneYamlFlowItemLogScreenshot;
361
-
362
- declare interface MidsceneYamlFlowItemAIAction {
363
- ai?: string; // this is the shortcut for aiAction
364
- aiAction?: string;
365
- aiActionProgressTips?: string[];
366
- cacheable?: boolean;
367
- }
368
-
369
- declare interface MidsceneYamlFlowItemAIAssert {
370
- aiAssert: string;
371
- errorMessage?: string;
372
- }
373
-
374
- declare interface MidsceneYamlFlowItemAIHover extends LocateOption {
375
- aiHover: TUserPrompt;
376
- }
377
-
378
- declare interface MidsceneYamlFlowItemAIInput extends LocateOption {
379
- aiInput: string; // value to input
380
- locate: TUserPrompt; // where to input
381
- }
382
-
383
- declare interface MidsceneYamlFlowItemAIKeyboardPress extends LocateOption {
384
- aiKeyboardPress: string;
385
- locate?: TUserPrompt; // where to press, optional
386
- }
387
-
388
- declare interface MidsceneYamlFlowItemAIQuery extends InsightExtractOption {
389
- aiQuery: string;
390
- name?: string;
391
- }
392
-
393
- declare interface MidsceneYamlFlowItemAIRightClick extends LocateOption {
394
- aiRightClick: TUserPrompt;
395
- }
396
-
397
- declare interface MidsceneYamlFlowItemAIScroll
398
- extends LocateOption,
399
- ScrollParam {
400
- aiScroll: null;
401
- locate?: TUserPrompt; // which area to scroll, optional
402
- }
403
-
404
- declare interface MidsceneYamlFlowItemAITap extends LocateOption {
405
- aiTap: TUserPrompt;
406
- }
407
-
408
- declare interface MidsceneYamlFlowItemAIWaitFor {
409
- aiWaitFor: string;
410
- timeout?: number;
411
- }
412
-
413
- declare interface MidsceneYamlFlowItemLogScreenshot {
414
- logScreenshot?: string; // optional, the title of the screenshot
415
- content?: string;
416
- }
417
-
418
- declare interface MidsceneYamlFlowItemSleep {
419
- sleep: number;
420
- }
421
-
422
- declare type PageType =
423
- | 'puppeteer'
424
- | 'playwright'
425
- | 'static'
426
- | 'chrome-extension-proxy'
427
- | 'android';
428
-
429
- export declare function plan(userInstruction: string, opts: {
430
- context: UIContext;
431
- pageType: PageType;
432
- actionSpace: DeviceAction[];
433
- callAI?: typeof callAiFn<PlanningAIResponse>;
434
- log?: string;
435
- actionContext?: string;
436
- }): Promise<PlanningAIResponse>;
437
-
438
- declare interface PlanningAction<ParamType = any> {
439
- thought?: string;
440
- type:
441
- | 'Locate'
442
- | 'Tap'
443
- | 'RightClick'
444
- | 'Hover'
445
- | 'Drag'
446
- | 'Input'
447
- | 'KeyboardPress'
448
- | 'Scroll'
449
- | 'Error'
450
- | 'Assert'
451
- | 'AssertWithoutThrow'
452
- | 'Sleep'
453
- | 'Finished'
454
- | 'AndroidBackButton'
455
- | 'AndroidHomeButton'
456
- | 'AndroidRecentAppsButton'
457
- | 'AndroidLongPress'
458
- | 'AndroidPull';
459
- param: ParamType;
460
- locate?: PlanningLocateParam | null;
461
- }
462
-
463
- declare interface PlanningAIResponse {
464
- action?: PlanningAction; // this is the qwen mode
465
- actions?: PlanningAction[];
466
- more_actions_needed_by_instruction: boolean;
467
- log: string;
468
- sleep?: number;
469
- error?: string;
470
- usage?: AIUsageInfo;
471
- rawResponse?: string;
472
- yamlFlow?: MidsceneYamlFlowItem[];
473
- yamlString?: string;
474
- }
475
-
476
- /**
477
- * planning
478
- *
479
- */
480
-
481
- declare interface PlanningLocateParam extends DetailedLocateParam {
482
- id?: string;
483
- bbox?: [number, number, number, number];
484
- }
485
-
486
- declare interface PlaywrightGenerationOptions {
487
- testName?: string;
488
- includeScreenshots?: boolean;
489
- includeTimestamps?: boolean;
490
- maxScreenshots?: number;
491
- description?: string;
492
- viewportSize?: {
493
- width: number;
494
- height: number;
495
- };
496
- waitForNetworkIdle?: boolean;
497
- waitForNetworkIdleTimeout?: number;
498
- }
499
-
500
- declare interface ReferenceImage {
501
- base64: string;
502
- rect?: Rect;
503
- }
504
-
505
- export declare function resizeImageForUiTars(imageBase64: string, size: Size): Promise<string>;
506
-
507
- declare interface ScrollParam {
508
- direction: 'down' | 'up' | 'right' | 'left';
509
- scrollType: 'once' | 'untilBottom' | 'untilTop' | 'untilRight' | 'untilLeft';
510
- distance?: null | number; // distance in px
511
- }
512
-
513
- declare interface StreamingAIResponse {
514
- /** The final accumulated content */
515
- content: string;
516
- /** Token usage information */
517
- usage?: AIUsageInfo;
518
- /** Whether the response was streamed */
519
- isStreamed: boolean;
520
- }
521
-
522
- declare type StreamingCallback = (chunk: CodeGenerationChunk) => void;
523
-
524
- declare interface StreamingCodeGenerationOptions {
525
- /** Whether to enable streaming output */
526
- stream?: boolean;
527
- /** Callback function to handle streaming chunks */
528
- onChunk?: StreamingCallback;
529
- /** Callback function to handle streaming completion */
530
- onComplete?: (finalCode: string) => void;
531
- /** Callback function to handle streaming errors */
532
- onError?: (error: Error) => void;
533
- }
534
-
535
- export declare function systemPromptToLocateElement(vlMode: ReturnType<typeof vlLocateMode>): string;
536
-
537
- declare type TMultimodalPrompt = {
538
- /**
539
- * Support use image to inspect elements.
540
- * The "images" field is an object that uses image name as key and image url as value.
541
- * The image url can be a local path, a http link , or a base64 string.
542
- */
543
- images?: {
544
- name: string;
545
- url: string;
546
- }[];
547
- /**
548
- * By default, the image url in the "images" filed starts with `https://` or `http://` will be directly sent to the LLM.
549
- * In case the images are not accessible to the LLM (One common case is that image url is internal network only.), you can enable this option.
550
- * Then image will be download and convert to base64 format.
551
- */
552
- convertHttpImage2Base64?: boolean;
553
- };
554
-
555
- declare type TUserPrompt =
556
- | string
557
- | ({
558
- prompt: string;
559
- } & Partial<TMultimodalPrompt>);
560
-
561
- /**
562
- * context
563
- */
564
-
565
- declare abstract class UIContext<ElementType extends BaseElement = BaseElement> {
566
- abstract screenshotBase64: string;
567
-
568
- abstract tree: ElementTreeNode<ElementType>;
569
-
570
- abstract size: Size;
571
- }
572
-
573
- export declare function vlmPlanning(options: {
574
- userInstruction: string;
575
- conversationHistory: ChatCompletionMessageParam[];
576
- size: {
577
- width: number;
578
- height: number;
579
- };
580
- }): Promise<{
581
- actions: PlanningAction<any>[];
582
- actionsFromModel: ReturnType<typeof actionParser>['parsed'];
583
- action_summary: string;
584
- yamlFlow?: MidsceneYamlFlowItem[];
585
- usage?: AIUsageInfo;
586
- rawResponse?: string;
587
- }>;
588
-
589
- declare interface YamlGenerationOptions {
590
- testName?: string;
591
- includeTimestamps?: boolean;
592
- maxScreenshots?: number;
593
- description?: string;
594
- }
595
-
596
- export { }