@midscene/core 0.26.2-beta-20250812091127.0 → 0.26.3-beta-20250813021342.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/es/ai-model.mjs +2502 -0
  2. package/dist/es/ai-model.mjs.map +1 -0
  3. package/dist/es/index.mjs +2362 -0
  4. package/dist/es/index.mjs.map +1 -0
  5. package/dist/es/tree.mjs +2 -0
  6. package/dist/es/utils.mjs +188 -0
  7. package/dist/es/{chunk-WXNDYUNG.js.map → utils.mjs.map} +1 -1
  8. package/dist/lib/ai-model.js +2581 -3
  9. package/dist/lib/ai-model.js.map +1 -0
  10. package/dist/lib/index.js +2375 -493
  11. package/dist/lib/index.js.map +1 -1
  12. package/dist/lib/tree.js +42 -11
  13. package/dist/lib/tree.js.map +1 -1
  14. package/dist/lib/utils.js +257 -29
  15. package/dist/lib/utils.js.map +1 -0
  16. package/dist/types/ai-model.d.ts +505 -99
  17. package/dist/types/index.d.ts +1299 -53
  18. package/dist/types/tree.d.ts +11 -1
  19. package/dist/types/utils.d.ts +47 -33
  20. package/package.json +28 -12
  21. package/dist/es/ai-model.d.ts +0 -99
  22. package/dist/es/ai-model.js +0 -44
  23. package/dist/es/chunk-DDYIQHOA.js +0 -2883
  24. package/dist/es/chunk-DDYIQHOA.js.map +0 -1
  25. package/dist/es/chunk-WXNDYUNG.js +0 -265
  26. package/dist/es/index.d.ts +0 -53
  27. package/dist/es/index.js +0 -570
  28. package/dist/es/index.js.map +0 -1
  29. package/dist/es/llm-planning-4e0c16fe.d.ts +0 -106
  30. package/dist/es/tree.d.ts +0 -1
  31. package/dist/es/tree.js +0 -13
  32. package/dist/es/tree.js.map +0 -1
  33. package/dist/es/types-8a6be57c.d.ts +0 -577
  34. package/dist/es/utils.d.ts +0 -33
  35. package/dist/es/utils.js +0 -30
  36. package/dist/lib/ai-model.d.ts +0 -99
  37. package/dist/lib/chunk-DDYIQHOA.js +0 -2883
  38. package/dist/lib/chunk-DDYIQHOA.js.map +0 -1
  39. package/dist/lib/chunk-WXNDYUNG.js +0 -265
  40. package/dist/lib/chunk-WXNDYUNG.js.map +0 -1
  41. package/dist/lib/index.d.ts +0 -53
  42. package/dist/lib/llm-planning-4e0c16fe.d.ts +0 -106
  43. package/dist/lib/tree.d.ts +0 -1
  44. package/dist/lib/types-8a6be57c.d.ts +0 -577
  45. package/dist/lib/utils.d.ts +0 -33
  46. package/dist/types/llm-planning-4e0c16fe.d.ts +0 -106
  47. package/dist/types/types-8a6be57c.d.ts +0 -577
@@ -1,53 +1,1299 @@
1
- import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightExtractParam, h as InsightExtractOption, T as TMultimodalPrompt, A as AIUsageInfo, i as TUserPrompt, j as InsightAssertionResponse, k as AIDescribeElementResponse } from './types-8a6be57c.js';
2
- export { z as AIAssertionResponse, x as AIDataExtractionResponse, v as AIElementCoordinatesResponse, u as AIElementLocatorResponse, w as AIElementResponse, q as AIResponseFormat, y as AISectionLocatorResponse, t as AISingleElementResponse, r as AISingleElementResponseById, s as AISingleElementResponseByPosition, W as AgentAssertOpt, F as AgentDescribeElementAtPointResult, V as AgentWaitForOpt, aa as BaseAgentParserOpt, G as CallAIFn, aE as CodeGenerationChunk, a9 as Color, aG as DeviceAction, K as DumpMeta, Q as ElementById, H as EnsureObject, ad as ExecutionRecorderItem, av as ExecutionTaskAction, au as ExecutionTaskActionApply, ag as ExecutionTaskHitBy, at as ExecutionTaskInsightAssertion, as as ExecutionTaskInsightAssertionApply, ar as ExecutionTaskInsightAssertionParam, ak as ExecutionTaskInsightDumpLog, am as ExecutionTaskInsightLocate, al as ExecutionTaskInsightLocateApply, aj as ExecutionTaskInsightLocateOutput, ai as ExecutionTaskInsightLocateParam, aq as ExecutionTaskInsightQuery, ap as ExecutionTaskInsightQueryApply, ao as ExecutionTaskInsightQueryOutput, an as ExecutionTaskInsightQueryParam, ax as ExecutionTaskLog, aw as ExecutionTaskLogApply, az as ExecutionTaskPlanning, ay as ExecutionTaskPlanningApply, ah as ExecutionTaskReturn, ae as ExecutionTaskType, af as ExecutorContext, b2 as FreeFn, aA as GroupedActionDump, N as InsightDump, O as LiteUISection, p as LocateOption, J as LocateResultElement, C as LocateValidatorResult, B as LocatorValidatorOption, b5 as MidsceneYamlConfig, b6 as MidsceneYamlConfigOutput, o as MidsceneYamlConfigResult, m as MidsceneYamlFlowItem, aN as MidsceneYamlFlowItemAIAction, aS as MidsceneYamlFlowItemAIAsk, aO as MidsceneYamlFlowItemAIAssert, aT as MidsceneYamlFlowItemAIBoolean, aX as MidsceneYamlFlowItemAIHover, aY as MidsceneYamlFlowItemAIInput, aZ as MidsceneYamlFlowItemAIKeyboardPress, aU as MidsceneYamlFlowItemAILocate, aQ as MidsceneYamlFlowItemAINumber, aP as MidsceneYamlFlowItemAIQuery, n as MidsceneYamlFlowItemAIRightClick, a_ as MidsceneYamlFlowItemAIScroll, aR as MidsceneYamlFlowItemAIString, aW as MidsceneYamlFlowItemAITap, aV as MidsceneYamlFlowItemAIWaitFor, a$ as MidsceneYamlFlowItemEvaluateJavaScript, b1 as MidsceneYamlFlowItemLogScreenshot, b0 as MidsceneYamlFlowItemSleep, M as MidsceneYamlScript, aL as MidsceneYamlScriptAndroidEnv, aM as MidsceneYamlScriptEnv, aJ as MidsceneYamlScriptEnvBase, aK as MidsceneYamlScriptWebEnv, l as MidsceneYamlTask, S as OnTaskStartTip, aB as PageType, P as PartialInsightDumpFromSDK, Z as PlanningAIResponse, Y as PlanningAction, a7 as PlanningActionParamAndroidLongPress, a8 as PlanningActionParamAndroidPull, a3 as PlanningActionParamAssert, a5 as PlanningActionParamError, $ as PlanningActionParamHover, a1 as PlanningActionParamInputOrKeyPress, a0 as PlanningActionParamRightClick, a2 as PlanningActionParamScroll, a4 as PlanningActionParamSleep, _ as PlanningActionParamTap, a6 as PlanningActionParamWaitFor, X as PlanningLocateParam, ac as PlaywrightParserOpt, ab as PuppeteerParserOpt, aH as ReferenceImage, R as ReportDumpWithAttributes, b4 as ScriptPlayerStatusValue, b3 as ScriptPlayerTaskStatus, aF as StreamingAIResponse, aD as StreamingCallback, aC as StreamingCodeGenerationOptions, aI as scrollParam } from './types-8a6be57c.js';
3
- import { c as callAiFn } from './llm-planning-4e0c16fe.js';
4
- export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-4e0c16fe.js';
5
- import { BaseElement, Rect } from '@midscene/shared/types';
6
- export { BaseElement, ElementTreeNode, Point, Rect, Size } from '@midscene/shared/types';
7
- export { getVersion } from './utils.js';
8
- export { MIDSCENE_MODEL_NAME, getAIConfig } from '@midscene/shared/env';
9
- import '@midscene/shared/constants';
10
- import 'openai/resources';
11
-
12
- declare class Executor {
13
- name: string;
14
- tasks: ExecutionTask[];
15
- status: 'init' | 'pending' | 'running' | 'completed' | 'error';
16
- onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];
17
- constructor(name: string, options?: ExecutionTaskProgressOptions & {
18
- tasks?: ExecutionTaskApply[];
19
- });
20
- private markTaskAsPending;
21
- append(task: ExecutionTaskApply[] | ExecutionTaskApply): Promise<void>;
22
- flush(): Promise<{
23
- output: any;
24
- thought?: string;
25
- } | undefined>;
26
- isInErrorState(): boolean;
27
- latestErrorTask(): ExecutionTask | null;
28
- dump(): ExecutionDump;
29
- }
30
-
31
- interface LocateOpts {
32
- context?: UIContext<BaseElement>;
33
- callAI?: typeof callAiFn<AIElementResponse>;
34
- }
35
- declare class Insight<ElementType extends BaseElement = BaseElement, ContextType extends UIContext<ElementType> = UIContext<ElementType>> {
36
- contextRetrieverFn: (action: InsightAction) => Promise<ContextType> | ContextType;
37
- aiVendorFn: (...args: Array<any>) => Promise<any>;
38
- onceDumpUpdatedFn?: DumpSubscriber;
39
- taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;
40
- constructor(context: ContextType | ((action: InsightAction) => Promise<ContextType> | ContextType), opt?: InsightOptions);
41
- locate(query: DetailedLocateParam, opt?: LocateOpts): Promise<LocateResult>;
42
- extract<T>(dataDemand: InsightExtractParam, opt?: InsightExtractOption, multimodalPrompt?: TMultimodalPrompt): Promise<{
43
- data: T;
44
- thought?: string;
45
- usage?: AIUsageInfo;
46
- }>;
47
- assert(assertion: TUserPrompt): Promise<InsightAssertionResponse>;
48
- describe(target: Rect | [number, number], opt?: {
49
- deepThink?: boolean;
50
- }): Promise<Pick<AIDescribeElementResponse, 'description'>>;
51
- }
52
-
53
- export { AIDescribeElementResponse, AIUsageInfo, DetailedLocateParam, DumpSubscriber, ExecutionDump, ExecutionTask, ExecutionTaskApply, ExecutionTaskProgressOptions, Executor, Insight, InsightAction, InsightAssertionResponse, InsightExtractOption, InsightExtractParam, InsightOptions, InsightTaskInfo, LocateResult, TMultimodalPrompt, TUserPrompt, UIContext, Insight as default };
1
+ import { BaseElement } from '@midscene/shared/types';
2
+ import type { ChatCompletionMessageParam } from 'openai/resources/index';
3
+ import type { ChatCompletionSystemMessageParam } from 'openai/resources/index';
4
+ import type { ChatCompletionUserMessageParam } from 'openai/resources/index';
5
+ import { ElementTreeNode } from '@midscene/shared/types';
6
+ import { getAIConfig } from '@midscene/shared/env';
7
+ import { MIDSCENE_MODEL_NAME } from '@midscene/shared/env';
8
+ import type { NodeType } from '@midscene/shared/constants';
9
+ import { Point } from '@midscene/shared/types';
10
+ import { Rect } from '@midscene/shared/types';
11
+ import { Size } from '@midscene/shared/types';
12
+
13
+ export declare interface AgentAssertOpt {
14
+ keepRawResponse?: boolean;
15
+ }
16
+
17
+ export declare interface AgentDescribeElementAtPointResult {
18
+ prompt: string;
19
+ deepThink: boolean;
20
+ verifyResult?: LocateValidatorResult;
21
+ }
22
+
23
+ export declare interface AgentWaitForOpt {
24
+ checkIntervalMs?: number;
25
+ timeoutMs?: number;
26
+ }
27
+
28
+ declare enum AIActionType {
29
+ ASSERT = 0,
30
+ INSPECT_ELEMENT = 1,
31
+ EXTRACT_DATA = 2,
32
+ PLAN = 3,
33
+ DESCRIBE_ELEMENT = 4
34
+ }
35
+
36
+ declare enum AIActionType_2 {
37
+ ASSERT = 0,
38
+ INSPECT_ELEMENT = 1,
39
+ EXTRACT_DATA = 2,
40
+ PLAN = 3,
41
+ DESCRIBE_ELEMENT = 4,
42
+ }
43
+
44
+ declare type AIArgs = [
45
+ ChatCompletionSystemMessageParam,
46
+ ...ChatCompletionUserMessageParam[]
47
+ ];
48
+
49
+ declare type AIArgs_2 = [
50
+ ChatCompletionSystemMessageParam,
51
+ ...ChatCompletionUserMessageParam[],
52
+ ];
53
+
54
+ export declare function AiAssert<ElementType extends BaseElement = BaseElement>(options: {
55
+ assertion: TUserPrompt_2;
56
+ context: UIContext_2<ElementType>;
57
+ }): Promise<{
58
+ content: AIAssertionResponse_2;
59
+ usage: AIUsageInfo_2 | undefined;
60
+ }>;
61
+
62
+ export declare interface AIAssertionResponse {
63
+ pass: boolean;
64
+ thought: string;
65
+ }
66
+
67
+ declare interface AIAssertionResponse_2 {
68
+ pass: boolean;
69
+ thought: string;
70
+ }
71
+
72
+ export declare interface AIDataExtractionResponse<DataDemand> {
73
+ data: DataDemand;
74
+ errors?: string[];
75
+ thought?: string;
76
+ }
77
+
78
+ export declare interface AIDescribeElementResponse {
79
+ description: string;
80
+ error?: string;
81
+ }
82
+
83
+ declare interface AIDescribeElementResponse_2 {
84
+ description: string;
85
+ error?: string;
86
+ }
87
+
88
+ export declare interface AIElementCoordinatesResponse {
89
+ bbox: [number, number, number, number];
90
+ isOrderSensitive?: boolean;
91
+ errors?: string[];
92
+ }
93
+
94
+ declare interface AIElementCoordinatesResponse_2 {
95
+ bbox: [number, number, number, number];
96
+ isOrderSensitive?: boolean;
97
+ errors?: string[];
98
+ }
99
+
100
+ export declare interface AIElementLocatorResponse {
101
+ elements: {
102
+ id: string;
103
+ reason?: string;
104
+ text?: string;
105
+ xpaths?: string[];
106
+ }[];
107
+ bbox?: [number, number, number, number];
108
+ isOrderSensitive?: boolean;
109
+ errors?: string[];
110
+ }
111
+
112
+ declare interface AIElementLocatorResponse_2 {
113
+ elements: {
114
+ id: string;
115
+ reason?: string;
116
+ text?: string;
117
+ xpaths?: string[];
118
+ }[];
119
+ bbox?: [number, number, number, number];
120
+ isOrderSensitive?: boolean;
121
+ errors?: string[];
122
+ }
123
+
124
+ export declare type AIElementResponse = AIElementLocatorResponse | AIElementCoordinatesResponse;
125
+
126
+ declare type AIElementResponse_2 =
127
+ | AIElementLocatorResponse_2
128
+ | AIElementCoordinatesResponse_2;
129
+
130
+ export declare function AiLocateElement<ElementType extends BaseElement = BaseElement>(options: {
131
+ context: UIContext_2<ElementType>;
132
+ targetElementDescription: TUserPrompt_2;
133
+ referenceImage?: ReferenceImage_2;
134
+ callAI?: typeof callAiFn<AIElementResponse_2 | [number, number]>;
135
+ searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;
136
+ }): Promise<{
137
+ parseResult: AIElementLocatorResponse_2;
138
+ rect?: Rect;
139
+ rawResponse: string;
140
+ elementById: ElementById_2;
141
+ usage?: AIUsageInfo_2;
142
+ isOrderSensitive?: boolean;
143
+ }>;
144
+
145
+ declare function AiLocateSection(options: {
146
+ context: UIContext_2<BaseElement>;
147
+ sectionDescription: TUserPrompt_2;
148
+ callAI?: typeof callAiFn<AISectionLocatorResponse_2>;
149
+ }): Promise<{
150
+ rect?: Rect;
151
+ imageBase64?: string;
152
+ error?: string;
153
+ rawResponse: string;
154
+ usage?: AIUsageInfo_2;
155
+ }>;
156
+
157
+ /**
158
+ * openai
159
+ *
160
+ */
161
+ export declare enum AIResponseFormat {
162
+ JSON = "json_object",
163
+ TEXT = "text"
164
+ }
165
+
166
+ export declare interface AISectionLocatorResponse {
167
+ bbox: [number, number, number, number];
168
+ references_bbox?: [number, number, number, number][];
169
+ error?: string;
170
+ }
171
+
172
+ declare interface AISectionLocatorResponse_2 {
173
+ bbox: [number, number, number, number];
174
+ references_bbox?: [number, number, number, number][];
175
+ error?: string;
176
+ }
177
+
178
+ export declare type AISingleElementResponse = AISingleElementResponseById;
179
+
180
+ export declare type AISingleElementResponseById = {
181
+ id: string;
182
+ reason?: string;
183
+ text?: string;
184
+ xpaths?: string[];
185
+ };
186
+
187
+ export declare type AISingleElementResponseByPosition = {
188
+ position?: {
189
+ x: number;
190
+ y: number;
191
+ };
192
+ bbox?: [number, number, number, number];
193
+ reason: string;
194
+ text: string;
195
+ };
196
+
197
+ export declare type AIUsageInfo = Record<string, any> & {
198
+ prompt_tokens: number | undefined;
199
+ completion_tokens: number | undefined;
200
+ total_tokens: number | undefined;
201
+ time_cost: number | undefined;
202
+ };
203
+
204
+ declare type AIUsageInfo_2 = Record<string, any> & {
205
+ prompt_tokens: number | undefined;
206
+ completion_tokens: number | undefined;
207
+ total_tokens: number | undefined;
208
+ time_cost: number | undefined;
209
+ };
210
+
211
+ export declare interface BaseAgentParserOpt {
212
+ selector?: string;
213
+ ignoreMarker?: boolean;
214
+ }
215
+
216
+ export { BaseElement }
217
+
218
+ /**
219
+ * insight
220
+ */
221
+ export declare type CallAIFn = <T>(messages: ChatCompletionMessageParam[]) => Promise<T>;
222
+
223
+ declare function callAiFn<T>(msgs: AIArgs, AIActionTypeValue: AIActionType): Promise<{
224
+ content: T;
225
+ usage?: AIUsageInfo_2;
226
+ }>;
227
+
228
+ /**
229
+ * insight
230
+ */
231
+
232
+ declare type CallAIFn_2 = <T>(
233
+ messages: ChatCompletionMessageParam[],
234
+ ) => Promise<T>;
235
+
236
+ declare async function callAiFn_2<T>(
237
+ msgs: AIArgs_2,
238
+ AIActionTypeValue: AIActionType_2,
239
+ ): Promise<{ content: T; usage?: AIUsageInfo_2 }> {
240
+ const jsonObject = await callToGetJSONObject<T>(msgs, AIActionTypeValue);
241
+
242
+ return {
243
+ content: jsonObject.content,
244
+ usage: jsonObject.usage,
245
+ };
246
+ }
247
+
248
+ export declare interface CodeGenerationChunk {
249
+ /** The incremental content chunk */
250
+ content: string;
251
+ /** The reasoning content */
252
+ reasoning_content: string;
253
+ /** The accumulated content so far */
254
+ accumulated: string;
255
+ /** Whether this is the final chunk */
256
+ isComplete: boolean;
257
+ /** Token usage information if available */
258
+ usage?: AIUsageInfo;
259
+ }
260
+
261
+ /**
262
+ * misc
263
+ */
264
+ export declare interface Color {
265
+ name: string;
266
+ hex: string;
267
+ }
268
+
269
+ export declare function describeUserPage<ElementType extends BaseElement = BaseElement>(context: Omit<UIContext_2<ElementType>, 'describer'>, opt?: {
270
+ truncateTextLength?: number;
271
+ filterNonTextContent?: boolean;
272
+ domIncluded?: boolean | 'visible-only';
273
+ visibleOnly?: boolean;
274
+ }): Promise<{
275
+ description: string;
276
+ elementById(idOrIndexId: string): ElementType;
277
+ elementByPosition(position: {
278
+ x: number;
279
+ y: number;
280
+ }, size: {
281
+ width: number;
282
+ height: number;
283
+ }): BaseElement | undefined;
284
+ insertElementByPosition(position: {
285
+ x: number;
286
+ y: number;
287
+ }): ElementType;
288
+ size: {
289
+ width: number;
290
+ height: number;
291
+ };
292
+ }>;
293
+
294
+ export declare interface DetailedLocateParam extends LocateOption {
295
+ prompt: TUserPrompt;
296
+ referenceImage?: ReferenceImage;
297
+ }
298
+
299
+ declare interface DetailedLocateParam_2 extends LocateOption_2 {
300
+ prompt: TUserPrompt_2;
301
+ referenceImage?: ReferenceImage_2;
302
+ }
303
+
304
+ export declare interface DeviceAction<ParamType = any> {
305
+ name: string;
306
+ description?: string;
307
+ paramSchema?: string;
308
+ paramDescription?: string;
309
+ location?: 'required' | 'optional' | false;
310
+ whatToLocate?: string;
311
+ call: (param: ParamType) => Promise<void> | void;
312
+ }
313
+
314
+ declare interface DeviceAction_2<ParamType = any> {
315
+ name: string;
316
+ description?: string;
317
+ paramSchema?: string;
318
+ paramDescription?: string;
319
+ location?: 'required' | 'optional' | false;
320
+ whatToLocate?: string; // what to locate if location is required or optional
321
+ call: (param: ParamType) => Promise<void> | void;
322
+ }
323
+
324
+ export declare interface DumpMeta {
325
+ sdkVersion: string;
326
+ logTime: number;
327
+ model_name: string;
328
+ model_description?: string;
329
+ }
330
+
331
+ declare interface DumpMeta_2 {
332
+ sdkVersion: string;
333
+ logTime: number;
334
+ model_name: string;
335
+ model_description?: string;
336
+ }
337
+
338
+ export declare type DumpSubscriber = (dump: InsightDump) => Promise<void> | void;
339
+
340
+ declare type DumpSubscriber_2 = (dump: InsightDump_2) => Promise<void> | void;
341
+
342
+ export declare type ElementById = (id: string) => BaseElement | null;
343
+
344
+ declare type ElementById_2 = (id: string) => BaseElement | null;
345
+
346
+ export { ElementTreeNode }
347
+
348
+ export declare type EnsureObject<T> = {
349
+ [K in keyof T]: any;
350
+ };
351
+
352
+ export declare interface ExecutionDump extends DumpMeta {
353
+ name: string;
354
+ description?: string;
355
+ tasks: ExecutionTask[];
356
+ }
357
+
358
+ declare interface ExecutionDump_2 extends DumpMeta_2 {
359
+ name: string;
360
+ description?: string;
361
+ tasks: ExecutionTask_2[];
362
+ }
363
+
364
+ export declare interface ExecutionRecorderItem {
365
+ type: 'screenshot';
366
+ ts: number;
367
+ screenshot?: string;
368
+ timing?: string;
369
+ }
370
+
371
+ declare interface ExecutionRecorderItem_2 {
372
+ type: 'screenshot';
373
+ ts: number;
374
+ screenshot?: string;
375
+ timing?: string;
376
+ }
377
+
378
+ export declare type ExecutionTask<E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<any, any, any>> = E & ExecutionTaskReturn<E extends ExecutionTaskApply<any, any, infer TaskOutput, any> ? TaskOutput : unknown, E extends ExecutionTaskApply<any, any, any, infer TaskLog> ? TaskLog : unknown> & {
379
+ status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';
380
+ error?: Error;
381
+ errorMessage?: string;
382
+ errorStack?: string;
383
+ timing?: {
384
+ start: number;
385
+ end?: number;
386
+ cost?: number;
387
+ };
388
+ usage?: AIUsageInfo;
389
+ };
390
+
391
+ declare type ExecutionTask_2<
392
+ E extends ExecutionTaskApply_2<any, any, any> = ExecutionTaskApply_2<
393
+ any,
394
+ any,
395
+ any
396
+ >,
397
+ > = E &
398
+ ExecutionTaskReturn_2<
399
+ E extends ExecutionTaskApply_2<any, any, infer TaskOutput, any>
400
+ ? TaskOutput
401
+ : unknown,
402
+ E extends ExecutionTaskApply_2<any, any, any, infer TaskLog>
403
+ ? TaskLog
404
+ : unknown
405
+ > & {
406
+ status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';
407
+ error?: Error;
408
+ errorMessage?: string;
409
+ errorStack?: string;
410
+ timing?: {
411
+ start: number;
412
+ end?: number;
413
+ cost?: number;
414
+ };
415
+ usage?: AIUsageInfo_2;
416
+ };
417
+
418
+ export declare type ExecutionTaskAction = ExecutionTask<ExecutionTaskActionApply>;
419
+
420
+ export declare type ExecutionTaskActionApply<ActionParam = any> = ExecutionTaskApply<'Action', ActionParam, void, void>;
421
+
422
+ export declare interface ExecutionTaskApply<Type extends ExecutionTaskType = any, TaskParam = any, TaskOutput = any, TaskLog = any> {
423
+ type: Type;
424
+ subType?: string;
425
+ param?: TaskParam;
426
+ thought?: string;
427
+ locate?: PlanningLocateParam | null;
428
+ pageContext?: UIContext;
429
+ executor: (param: TaskParam, context: ExecutorContext) => Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void> | undefined | void;
430
+ }
431
+
432
+ declare interface ExecutionTaskApply_2<
433
+ Type extends ExecutionTaskType_2 = any,
434
+ TaskParam = any,
435
+ TaskOutput = any,
436
+ TaskLog = any,
437
+ > {
438
+ type: Type;
439
+ subType?: string;
440
+ param?: TaskParam;
441
+ thought?: string;
442
+ locate?: PlanningLocateParam_2 | null;
443
+ pageContext?: UIContext_2;
444
+ executor: (
445
+ param: TaskParam,
446
+ context: ExecutorContext_2,
447
+ ) => // biome-ignore lint/suspicious/noConfusingVoidType: <explanation>
448
+ | Promise<ExecutionTaskReturn_2<TaskOutput, TaskLog> | undefined | void>
449
+ | undefined
450
+ | void;
451
+ }
452
+
453
+ export declare interface ExecutionTaskHitBy {
454
+ from: string;
455
+ context: Record<string, any>;
456
+ }
457
+
458
+ declare interface ExecutionTaskHitBy_2 {
459
+ from: string;
460
+ context: Record<string, any>;
461
+ }
462
+
463
+ export declare type ExecutionTaskInsightAssertion = ExecutionTask<ExecutionTaskInsightAssertionApply>;
464
+
465
+ export declare type ExecutionTaskInsightAssertionApply = ExecutionTaskApply<'Insight', ExecutionTaskInsightAssertionParam, InsightAssertionResponse, ExecutionTaskInsightDumpLog>;
466
+
467
+ export declare interface ExecutionTaskInsightAssertionParam {
468
+ assertion: string;
469
+ }
470
+
471
+ export declare interface ExecutionTaskInsightDumpLog {
472
+ dump?: InsightDump;
473
+ }
474
+
475
+ export declare type ExecutionTaskInsightLocate = ExecutionTask<ExecutionTaskInsightLocateApply>;
476
+
477
+ export declare type ExecutionTaskInsightLocateApply = ExecutionTaskApply<'Insight', ExecutionTaskInsightLocateParam, ExecutionTaskInsightLocateOutput, ExecutionTaskInsightDumpLog>;
478
+
479
+ export declare interface ExecutionTaskInsightLocateOutput {
480
+ element: LocateResultElement | null;
481
+ }
482
+
483
+ export declare type ExecutionTaskInsightLocateParam = PlanningLocateParam;
484
+
485
+ export declare type ExecutionTaskInsightQuery = ExecutionTask<ExecutionTaskInsightQueryApply>;
486
+
487
+ export declare type ExecutionTaskInsightQueryApply = ExecutionTaskApply<'Insight', ExecutionTaskInsightQueryParam, any, ExecutionTaskInsightDumpLog>;
488
+
489
+ export declare interface ExecutionTaskInsightQueryOutput {
490
+ data: any;
491
+ }
492
+
493
+ export declare interface ExecutionTaskInsightQueryParam {
494
+ dataDemand: InsightExtractParam;
495
+ }
496
+
497
+ export declare type ExecutionTaskLog = ExecutionTask<ExecutionTaskLogApply>;
498
+
499
+ export declare type ExecutionTaskLogApply<LogParam = {
500
+ content: string;
501
+ }> = ExecutionTaskApply<'Log', LogParam, void, void>;
502
+
503
+ export declare type ExecutionTaskPlanning = ExecutionTask<ExecutionTaskPlanningApply>;
504
+
505
+ export declare type ExecutionTaskPlanningApply = ExecutionTaskApply<'Planning', {
506
+ userInstruction: string;
507
+ log?: string;
508
+ }, PlanningAIResponse>;
509
+
510
+ export declare interface ExecutionTaskProgressOptions {
511
+ onTaskStart?: (task: ExecutionTask) => Promise<void> | void;
512
+ }
513
+
514
+ declare interface ExecutionTaskProgressOptions_2 {
515
+ onTaskStart?: (task: ExecutionTask_2) => Promise<void> | void;
516
+ }
517
+
518
+ export declare interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {
519
+ output?: TaskOutput;
520
+ log?: TaskLog;
521
+ recorder?: ExecutionRecorderItem[];
522
+ hitBy?: ExecutionTaskHitBy;
523
+ }
524
+
525
+ declare interface ExecutionTaskReturn_2<TaskOutput = unknown, TaskLog = unknown> {
526
+ output?: TaskOutput;
527
+ log?: TaskLog;
528
+ recorder?: ExecutionRecorderItem_2[];
529
+ hitBy?: ExecutionTaskHitBy_2;
530
+ }
531
+
532
+ export declare type ExecutionTaskType = 'Planning' | 'Insight' | 'Action' | 'Assertion' | 'Log';
533
+
534
+ declare type ExecutionTaskType_2 =
535
+ | 'Planning'
536
+ | 'Insight'
537
+ | 'Action'
538
+ | 'Assertion'
539
+ | 'Log';
540
+
541
+ export declare class Executor {
542
+ name: string;
543
+ tasks: ExecutionTask_2[];
544
+ status: 'init' | 'pending' | 'running' | 'completed' | 'error';
545
+ onTaskStart?: ExecutionTaskProgressOptions_2['onTaskStart'];
546
+ constructor(name: string, options?: ExecutionTaskProgressOptions_2 & {
547
+ tasks?: ExecutionTaskApply_2[];
548
+ });
549
+ private markTaskAsPending;
550
+ append(task: ExecutionTaskApply_2[] | ExecutionTaskApply_2): Promise<void>;
551
+ flush(): Promise<{
552
+ output: any;
553
+ thought?: string;
554
+ } | undefined>;
555
+ isInErrorState(): boolean;
556
+ latestErrorTask(): ExecutionTask_2 | null;
557
+ dump(): ExecutionDump_2;
558
+ }
559
+
560
+ export declare interface ExecutorContext {
561
+ task: ExecutionTask;
562
+ element?: LocateResultElement | null;
563
+ }
564
+
565
+ declare interface ExecutorContext_2 {
566
+ task: ExecutionTask_2;
567
+ element?: LocateResultElement_2 | null;
568
+ }
569
+
570
+ export declare interface FreeFn {
571
+ name: string;
572
+ fn: () => void;
573
+ }
574
+
575
+ export { getAIConfig }
576
+
577
+ export declare function getVersion(): string;
578
+
579
+ export declare interface GroupedActionDump {
580
+ groupName: string;
581
+ groupDescription?: string;
582
+ executions: ExecutionDump[];
583
+ }
584
+
585
+ declare class Insight<ElementType extends BaseElement = BaseElement, ContextType extends UIContext_2<ElementType> = UIContext_2<ElementType>> {
586
+ contextRetrieverFn: (action: InsightAction_2) => Promise<ContextType> | ContextType;
587
+ aiVendorFn: (...args: Array<any>) => Promise<any>;
588
+ onceDumpUpdatedFn?: DumpSubscriber_2;
589
+ taskInfo?: Omit<InsightTaskInfo_2, 'durationMs'>;
590
+ constructor(context: ContextType | ((action: InsightAction_2) => Promise<ContextType> | ContextType), opt?: InsightOptions_2);
591
+ locate(query: DetailedLocateParam_2, opt?: LocateOpts): Promise<LocateResult_2>;
592
+ extract<T>(dataDemand: InsightExtractParam_2, opt?: InsightExtractOption_2, multimodalPrompt?: TMultimodalPrompt_2): Promise<{
593
+ data: T;
594
+ thought?: string;
595
+ usage?: AIUsageInfo_2;
596
+ }>;
597
+ assert(assertion: TUserPrompt_2): Promise<InsightAssertionResponse_2>;
598
+ describe(target: Rect | [number, number], opt?: {
599
+ deepThink?: boolean;
600
+ }): Promise<Pick<AIDescribeElementResponse_2, 'description'>>;
601
+ }
602
+ export { Insight }
603
+ export default Insight;
604
+
605
+ export declare type InsightAction = 'locate' | 'extract' | 'assert' | 'describe';
606
+
607
+ declare type InsightAction_2 = 'locate' | 'extract' | 'assert' | 'describe';
608
+
609
+ export declare type InsightAssertionResponse = AIAssertionResponse & {
610
+ usage?: AIUsageInfo;
611
+ };
612
+
613
+ declare type InsightAssertionResponse_2 = AIAssertionResponse_2 & {
614
+ usage?: AIUsageInfo_2;
615
+ };
616
+
617
+ export declare interface InsightDump extends DumpMeta {
618
+ type: 'locate' | 'extract' | 'assert';
619
+ logId: string;
620
+ userQuery: {
621
+ element?: TUserPrompt;
622
+ dataDemand?: InsightExtractParam;
623
+ assertion?: TUserPrompt;
624
+ };
625
+ matchedElement: BaseElement[];
626
+ matchedRect?: Rect;
627
+ deepThink?: boolean;
628
+ data: any;
629
+ assertionPass?: boolean;
630
+ assertionThought?: string;
631
+ taskInfo: InsightTaskInfo;
632
+ error?: string;
633
+ output?: any;
634
+ }
635
+
636
+ declare interface InsightDump_2 extends DumpMeta_2 {
637
+ type: 'locate' | 'extract' | 'assert';
638
+ logId: string;
639
+ userQuery: {
640
+ element?: TUserPrompt_2;
641
+ dataDemand?: InsightExtractParam_2;
642
+ assertion?: TUserPrompt_2;
643
+ };
644
+ matchedElement: BaseElement[];
645
+ matchedRect?: Rect;
646
+ deepThink?: boolean;
647
+ data: any;
648
+ assertionPass?: boolean;
649
+ assertionThought?: string;
650
+ taskInfo: InsightTaskInfo_2;
651
+ error?: string;
652
+ output?: any;
653
+ }
654
+
655
+ export declare interface InsightExtractOption {
656
+ domIncluded?: boolean | 'visible-only';
657
+ screenshotIncluded?: boolean;
658
+ returnThought?: boolean;
659
+ }
660
+
661
+ declare interface InsightExtractOption_2 {
662
+ domIncluded?: boolean | 'visible-only';
663
+ screenshotIncluded?: boolean;
664
+ returnThought?: boolean;
665
+ }
666
+
667
+ export declare type InsightExtractParam = string | Record<string, string>;
668
+
669
+ declare type InsightExtractParam_2 = string | Record<string, string>;
670
+
671
+ export declare interface InsightOptions {
672
+ taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;
673
+ aiVendorFn?: CallAIFn;
674
+ }
675
+
676
+ declare interface InsightOptions_2 {
677
+ taskInfo?: Omit<InsightTaskInfo_2, 'durationMs'>;
678
+ aiVendorFn?: CallAIFn_2;
679
+ }
680
+
681
+ export declare interface InsightTaskInfo {
682
+ durationMs: number;
683
+ formatResponse?: string;
684
+ rawResponse?: string;
685
+ usage?: AIUsageInfo;
686
+ searchArea?: Rect;
687
+ searchAreaRawResponse?: string;
688
+ searchAreaUsage?: AIUsageInfo;
689
+ }
690
+
691
+ declare interface InsightTaskInfo_2 {
692
+ durationMs: number;
693
+ formatResponse?: string;
694
+ rawResponse?: string;
695
+ usage?: AIUsageInfo_2;
696
+ searchArea?: Rect;
697
+ searchAreaRawResponse?: string;
698
+ searchAreaUsage?: AIUsageInfo_2;
699
+ }
700
+
701
+ export declare interface LiteUISection {
702
+ name: string;
703
+ description: string;
704
+ sectionCharacteristics: string;
705
+ textIds: string[];
706
+ }
707
+
708
+ export declare interface LocateOption {
709
+ deepThink?: boolean;
710
+ cacheable?: boolean;
711
+ xpath?: string;
712
+ pageContext?: UIContext<BaseElement>;
713
+ }
714
+
715
+ declare interface LocateOption_2 {
716
+ deepThink?: boolean; // only available in vl model
717
+ cacheable?: boolean; // user can set this param to false to disable the cache for a single agent api
718
+ xpath?: string; // only available in web
719
+ pageContext?: UIContext_2<BaseElement>;
720
+ }
721
+
722
+ declare interface LocateOpts {
723
+ context?: UIContext_2<BaseElement>;
724
+ callAI?: typeof callAiFn_2<AIElementResponse_2>;
725
+ }
726
+
727
+ export declare interface LocateResult {
728
+ element: LocateResultElement | null;
729
+ rect?: Rect;
730
+ }
731
+
732
+ declare interface LocateResult_2 {
733
+ element: LocateResultElement_2 | null;
734
+ rect?: Rect;
735
+ }
736
+
737
+ export declare type LocateResultElement = {
738
+ id: string;
739
+ indexId?: number;
740
+ center: [number, number];
741
+ rect: Rect;
742
+ xpaths: string[];
743
+ attributes: {
744
+ nodeType: NodeType;
745
+ [key: string]: string;
746
+ };
747
+ isOrderSensitive?: boolean;
748
+ };
749
+
750
+ declare type LocateResultElement_2 = {
751
+ id: string;
752
+ indexId?: number;
753
+ center: [number, number];
754
+ rect: Rect;
755
+ xpaths: string[];
756
+ attributes: {
757
+ nodeType: NodeType;
758
+ [key: string]: string;
759
+ };
760
+ isOrderSensitive?: boolean;
761
+ };
762
+
763
+ export declare interface LocateValidatorResult {
764
+ pass: boolean;
765
+ rect: Rect;
766
+ center: [number, number];
767
+ centerDistance?: number;
768
+ }
769
+
770
+ export declare interface LocatorValidatorOption {
771
+ centerDistanceThreshold?: number;
772
+ }
773
+
774
+ export { MIDSCENE_MODEL_NAME }
775
+
776
+ export declare interface MidsceneYamlConfig {
777
+ concurrent?: number;
778
+ continueOnError?: boolean;
779
+ summary?: string;
780
+ shareBrowserContext?: boolean;
781
+ web?: MidsceneYamlScriptWebEnv;
782
+ android?: MidsceneYamlScriptAndroidEnv;
783
+ files: string[];
784
+ headed?: boolean;
785
+ keepWindow?: boolean;
786
+ dotenvOverride?: boolean;
787
+ dotenvDebug?: boolean;
788
+ }
789
+
790
+ export declare interface MidsceneYamlConfigOutput {
791
+ format?: 'json';
792
+ path?: string;
793
+ }
794
+
795
+ export declare interface MidsceneYamlConfigResult {
796
+ file: string;
797
+ success: boolean;
798
+ executed: boolean;
799
+ output?: string | null;
800
+ report?: string | null;
801
+ error?: string;
802
+ duration?: number;
803
+ }
804
+
805
+ export declare type MidsceneYamlFlowItem = MidsceneYamlFlowItemAIAction | MidsceneYamlFlowItemAIAssert | MidsceneYamlFlowItemAIQuery | MidsceneYamlFlowItemAIWaitFor | MidsceneYamlFlowItemAITap | MidsceneYamlFlowItemAIRightClick | MidsceneYamlFlowItemAIHover | MidsceneYamlFlowItemAIInput | MidsceneYamlFlowItemAIKeyboardPress | MidsceneYamlFlowItemAIScroll | MidsceneYamlFlowItemSleep | MidsceneYamlFlowItemLogScreenshot;
806
+
807
+ declare type MidsceneYamlFlowItem_2 =
808
+ | MidsceneYamlFlowItemAIAction_2
809
+ | MidsceneYamlFlowItemAIAssert_2
810
+ | MidsceneYamlFlowItemAIQuery_2
811
+ | MidsceneYamlFlowItemAIWaitFor_2
812
+ | MidsceneYamlFlowItemAITap_2
813
+ | MidsceneYamlFlowItemAIRightClick_2
814
+ | MidsceneYamlFlowItemAIHover_2
815
+ | MidsceneYamlFlowItemAIInput_2
816
+ | MidsceneYamlFlowItemAIKeyboardPress_2
817
+ | MidsceneYamlFlowItemAIScroll_2
818
+ | MidsceneYamlFlowItemSleep_2
819
+ | MidsceneYamlFlowItemLogScreenshot_2;
820
+
821
+ export declare interface MidsceneYamlFlowItemAIAction {
822
+ ai?: string;
823
+ aiAction?: string;
824
+ aiActionProgressTips?: string[];
825
+ cacheable?: boolean;
826
+ }
827
+
828
+ declare interface MidsceneYamlFlowItemAIAction_2 {
829
+ ai?: string; // this is the shortcut for aiAction
830
+ aiAction?: string;
831
+ aiActionProgressTips?: string[];
832
+ cacheable?: boolean;
833
+ }
834
+
835
+ export declare interface MidsceneYamlFlowItemAIAsk extends InsightExtractOption {
836
+ aiAsk: string;
837
+ name?: string;
838
+ }
839
+
840
+ export declare interface MidsceneYamlFlowItemAIAssert {
841
+ aiAssert: string;
842
+ errorMessage?: string;
843
+ }
844
+
845
+ declare interface MidsceneYamlFlowItemAIAssert_2 {
846
+ aiAssert: string;
847
+ errorMessage?: string;
848
+ }
849
+
850
+ export declare interface MidsceneYamlFlowItemAIBoolean extends InsightExtractOption {
851
+ aiBoolean: string;
852
+ name?: string;
853
+ }
854
+
855
+ export declare interface MidsceneYamlFlowItemAIHover extends LocateOption {
856
+ aiHover: TUserPrompt;
857
+ }
858
+
859
+ declare interface MidsceneYamlFlowItemAIHover_2 extends LocateOption_2 {
860
+ aiHover: TUserPrompt_2;
861
+ }
862
+
863
+ export declare interface MidsceneYamlFlowItemAIInput extends LocateOption {
864
+ aiInput: string;
865
+ locate: TUserPrompt;
866
+ }
867
+
868
+ declare interface MidsceneYamlFlowItemAIInput_2 extends LocateOption_2 {
869
+ aiInput: string; // value to input
870
+ locate: TUserPrompt_2; // where to input
871
+ }
872
+
873
+ export declare interface MidsceneYamlFlowItemAIKeyboardPress extends LocateOption {
874
+ aiKeyboardPress: string;
875
+ locate?: TUserPrompt;
876
+ }
877
+
878
+ declare interface MidsceneYamlFlowItemAIKeyboardPress_2 extends LocateOption_2 {
879
+ aiKeyboardPress: string;
880
+ locate?: TUserPrompt_2; // where to press, optional
881
+ }
882
+
883
+ export declare interface MidsceneYamlFlowItemAILocate extends LocateOption {
884
+ aiLocate: string;
885
+ name?: string;
886
+ }
887
+
888
+ export declare interface MidsceneYamlFlowItemAINumber extends InsightExtractOption {
889
+ aiNumber: string;
890
+ name?: string;
891
+ }
892
+
893
+ export declare interface MidsceneYamlFlowItemAIQuery extends InsightExtractOption {
894
+ aiQuery: string;
895
+ name?: string;
896
+ }
897
+
898
+ declare interface MidsceneYamlFlowItemAIQuery_2 extends InsightExtractOption_2 {
899
+ aiQuery: string;
900
+ name?: string;
901
+ }
902
+
903
+ export declare interface MidsceneYamlFlowItemAIRightClick extends LocateOption {
904
+ aiRightClick: TUserPrompt;
905
+ }
906
+
907
+ declare interface MidsceneYamlFlowItemAIRightClick_2 extends LocateOption_2 {
908
+ aiRightClick: TUserPrompt_2;
909
+ }
910
+
911
+ export declare interface MidsceneYamlFlowItemAIScroll extends LocateOption, PlanningActionParamScroll {
912
+ aiScroll: null;
913
+ locate?: TUserPrompt;
914
+ }
915
+
916
+ declare interface MidsceneYamlFlowItemAIScroll_2
917
+ extends LocateOption_2,
918
+ PlanningActionParamScroll_2 {
919
+ aiScroll: null;
920
+ locate?: TUserPrompt_2; // which area to scroll, optional
921
+ }
922
+
923
+ export declare interface MidsceneYamlFlowItemAIString extends InsightExtractOption {
924
+ aiString: string;
925
+ name?: string;
926
+ }
927
+
928
+ export declare interface MidsceneYamlFlowItemAITap extends LocateOption {
929
+ aiTap: TUserPrompt;
930
+ }
931
+
932
+ declare interface MidsceneYamlFlowItemAITap_2 extends LocateOption_2 {
933
+ aiTap: TUserPrompt_2;
934
+ }
935
+
936
+ export declare interface MidsceneYamlFlowItemAIWaitFor {
937
+ aiWaitFor: string;
938
+ timeout?: number;
939
+ }
940
+
941
+ declare interface MidsceneYamlFlowItemAIWaitFor_2 {
942
+ aiWaitFor: string;
943
+ timeout?: number;
944
+ }
945
+
946
+ export declare interface MidsceneYamlFlowItemEvaluateJavaScript {
947
+ javascript: string;
948
+ name?: string;
949
+ }
950
+
951
+ export declare interface MidsceneYamlFlowItemLogScreenshot {
952
+ logScreenshot?: string;
953
+ content?: string;
954
+ }
955
+
956
+ declare interface MidsceneYamlFlowItemLogScreenshot_2 {
957
+ logScreenshot?: string; // optional, the title of the screenshot
958
+ content?: string;
959
+ }
960
+
961
+ export declare interface MidsceneYamlFlowItemSleep {
962
+ sleep: number;
963
+ }
964
+
965
+ declare interface MidsceneYamlFlowItemSleep_2 {
966
+ sleep: number;
967
+ }
968
+
969
+ export declare interface MidsceneYamlScript {
970
+ target?: MidsceneYamlScriptWebEnv;
971
+ web?: MidsceneYamlScriptWebEnv;
972
+ android?: MidsceneYamlScriptAndroidEnv;
973
+ tasks: MidsceneYamlTask[];
974
+ }
975
+
976
+ export declare interface MidsceneYamlScriptAndroidEnv extends MidsceneYamlScriptEnvBase {
977
+ deviceId?: string;
978
+ launch?: string;
979
+ }
980
+
981
+ export declare type MidsceneYamlScriptEnv = MidsceneYamlScriptWebEnv | MidsceneYamlScriptAndroidEnv;
982
+
983
+ export declare interface MidsceneYamlScriptEnvBase {
984
+ output?: string;
985
+ unstableLogContent?: boolean | string;
986
+ aiActionContext?: string;
987
+ }
988
+
989
+ export declare interface MidsceneYamlScriptWebEnv extends MidsceneYamlScriptEnvBase {
990
+ serve?: string;
991
+ url: string;
992
+ userAgent?: string;
993
+ acceptInsecureCerts?: boolean;
994
+ viewportWidth?: number;
995
+ viewportHeight?: number;
996
+ viewportScale?: number;
997
+ waitForNetworkIdle?: {
998
+ timeout?: number;
999
+ continueOnNetworkIdleError?: boolean;
1000
+ };
1001
+ cookie?: string;
1002
+ forceSameTabNavigation?: boolean;
1003
+ bridgeMode?: false | 'newTabWithUrl' | 'currentTab';
1004
+ closeNewTabsAfterDisconnect?: boolean;
1005
+ }
1006
+
1007
+ export declare interface MidsceneYamlTask {
1008
+ name: string;
1009
+ flow: MidsceneYamlFlowItem[];
1010
+ continueOnError?: boolean;
1011
+ }
1012
+
1013
+ /**
1014
+ * agent
1015
+ */
1016
+ export declare type OnTaskStartTip = (tip: string) => Promise<void> | void;
1017
+
1018
+ export declare type PageType = 'puppeteer' | 'playwright' | 'static' | 'chrome-extension-proxy' | 'android';
1019
+
1020
+ declare type PageType_2 =
1021
+ | 'puppeteer'
1022
+ | 'playwright'
1023
+ | 'static'
1024
+ | 'chrome-extension-proxy'
1025
+ | 'android';
1026
+
1027
+ export declare type PartialInsightDumpFromSDK = Omit<InsightDump, 'sdkVersion' | 'logTime' | 'logId' | 'model_name'>;
1028
+
1029
+ export declare function plan(userInstruction: string, opts: {
1030
+ context: UIContext_2;
1031
+ pageType: PageType_2;
1032
+ actionSpace: DeviceAction_2[];
1033
+ callAI?: typeof callAiFn<PlanningAIResponse_2>;
1034
+ log?: string;
1035
+ actionContext?: string;
1036
+ }): Promise<PlanningAIResponse_2>;
1037
+
1038
+ export declare interface PlanningAction<ParamType = any> {
1039
+ thought?: string;
1040
+ type: 'Locate' | 'Tap' | 'RightClick' | 'Hover' | 'Drag' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'Assert' | 'AssertWithoutThrow' | 'Sleep' | 'Finished' | 'AndroidBackButton' | 'AndroidHomeButton' | 'AndroidRecentAppsButton' | 'AndroidLongPress' | 'AndroidPull';
1041
+ param: ParamType;
1042
+ locate?: PlanningLocateParam | null;
1043
+ }
1044
+
1045
+ declare interface PlanningAction_2<ParamType = any> {
1046
+ thought?: string;
1047
+ type:
1048
+ | 'Locate'
1049
+ | 'Tap'
1050
+ | 'RightClick'
1051
+ | 'Hover'
1052
+ | 'Drag'
1053
+ | 'Input'
1054
+ | 'KeyboardPress'
1055
+ | 'Scroll'
1056
+ | 'Error'
1057
+ | 'Assert'
1058
+ | 'AssertWithoutThrow'
1059
+ | 'Sleep'
1060
+ | 'Finished'
1061
+ | 'AndroidBackButton'
1062
+ | 'AndroidHomeButton'
1063
+ | 'AndroidRecentAppsButton'
1064
+ | 'AndroidLongPress'
1065
+ | 'AndroidPull';
1066
+ param: ParamType;
1067
+ locate?: PlanningLocateParam_2 | null;
1068
+ }
1069
+
1070
+ export declare interface PlanningActionParamAndroidLongPress {
1071
+ x: number;
1072
+ y: number;
1073
+ duration?: number;
1074
+ }
1075
+
1076
+ export declare interface PlanningActionParamAndroidPull {
1077
+ direction: 'up' | 'down';
1078
+ startPoint?: {
1079
+ x: number;
1080
+ y: number;
1081
+ };
1082
+ distance?: number;
1083
+ duration?: number;
1084
+ }
1085
+
1086
+ export declare interface PlanningActionParamAssert {
1087
+ assertion: TUserPrompt;
1088
+ }
1089
+
1090
+ export declare interface PlanningActionParamError {
1091
+ thought: string;
1092
+ }
1093
+
1094
+ export declare type PlanningActionParamHover = null;
1095
+
1096
+ export declare interface PlanningActionParamInputOrKeyPress {
1097
+ value: string;
1098
+ autoDismissKeyboard?: boolean;
1099
+ }
1100
+
1101
+ export declare type PlanningActionParamRightClick = null;
1102
+
1103
+ export declare type PlanningActionParamScroll = scrollParam;
1104
+
1105
+ declare type PlanningActionParamScroll_2 = scrollParam_2;
1106
+
1107
+ export declare interface PlanningActionParamSleep {
1108
+ timeMs: number;
1109
+ }
1110
+
1111
+ export declare type PlanningActionParamTap = null;
1112
+
1113
+ export declare type PlanningActionParamWaitFor = AgentWaitForOpt & {
1114
+ assertion: string;
1115
+ };
1116
+
1117
+ export declare interface PlanningAIResponse {
1118
+ action?: PlanningAction;
1119
+ actions?: PlanningAction[];
1120
+ more_actions_needed_by_instruction: boolean;
1121
+ log: string;
1122
+ sleep?: number;
1123
+ error?: string;
1124
+ usage?: AIUsageInfo;
1125
+ rawResponse?: string;
1126
+ yamlFlow?: MidsceneYamlFlowItem[];
1127
+ yamlString?: string;
1128
+ }
1129
+
1130
+ declare interface PlanningAIResponse_2 {
1131
+ action?: PlanningAction_2; // this is the qwen mode
1132
+ actions?: PlanningAction_2[];
1133
+ more_actions_needed_by_instruction: boolean;
1134
+ log: string;
1135
+ sleep?: number;
1136
+ error?: string;
1137
+ usage?: AIUsageInfo_2;
1138
+ rawResponse?: string;
1139
+ yamlFlow?: MidsceneYamlFlowItem_2[];
1140
+ yamlString?: string;
1141
+ }
1142
+
1143
+ /**
1144
+ * planning
1145
+ *
1146
+ */
1147
+ export declare interface PlanningLocateParam extends DetailedLocateParam {
1148
+ id?: string;
1149
+ bbox?: [number, number, number, number];
1150
+ }
1151
+
1152
+ /**
1153
+ * planning
1154
+ *
1155
+ */
1156
+
1157
+ declare interface PlanningLocateParam_2 extends DetailedLocateParam_2 {
1158
+ id?: string;
1159
+ bbox?: [number, number, number, number];
1160
+ }
1161
+
1162
+ export declare interface PlaywrightParserOpt extends BaseAgentParserOpt {
1163
+ }
1164
+
1165
+ export { Point }
1166
+
1167
+ export declare interface PuppeteerParserOpt extends BaseAgentParserOpt {
1168
+ }
1169
+
1170
+ export { Rect }
1171
+
1172
+ export declare interface ReferenceImage {
1173
+ base64: string;
1174
+ rect?: Rect;
1175
+ }
1176
+
1177
+ declare interface ReferenceImage_2 {
1178
+ base64: string;
1179
+ rect?: Rect;
1180
+ }
1181
+
1182
+ export declare interface ReportDumpWithAttributes {
1183
+ dumpString: string;
1184
+ attributes?: Record<string, any>;
1185
+ }
1186
+
1187
+ export declare type ScriptPlayerStatusValue = 'init' | 'running' | 'done' | 'error';
1188
+
1189
+ export declare interface ScriptPlayerTaskStatus extends MidsceneYamlTask {
1190
+ status: ScriptPlayerStatusValue;
1191
+ currentStep?: number;
1192
+ totalSteps: number;
1193
+ error?: Error;
1194
+ }
1195
+
1196
+ export declare interface scrollParam {
1197
+ direction: 'down' | 'up' | 'right' | 'left';
1198
+ scrollType: 'once' | 'untilBottom' | 'untilTop' | 'untilRight' | 'untilLeft';
1199
+ distance?: null | number;
1200
+ }
1201
+
1202
+ declare interface scrollParam_2 {
1203
+ direction: 'down' | 'up' | 'right' | 'left';
1204
+ scrollType: 'once' | 'untilBottom' | 'untilTop' | 'untilRight' | 'untilLeft';
1205
+ distance?: null | number; // distance in px
1206
+ }
1207
+
1208
+ export { Size }
1209
+
1210
+ export declare interface StreamingAIResponse {
1211
+ /** The final accumulated content */
1212
+ content: string;
1213
+ /** Token usage information */
1214
+ usage?: AIUsageInfo;
1215
+ /** Whether the response was streamed */
1216
+ isStreamed: boolean;
1217
+ }
1218
+
1219
+ export declare type StreamingCallback = (chunk: CodeGenerationChunk) => void;
1220
+
1221
+ export declare interface StreamingCodeGenerationOptions {
1222
+ /** Whether to enable streaming output */
1223
+ stream?: boolean;
1224
+ /** Callback function to handle streaming chunks */
1225
+ onChunk?: StreamingCallback;
1226
+ /** Callback function to handle streaming completion */
1227
+ onComplete?: (finalCode: string) => void;
1228
+ /** Callback function to handle streaming errors */
1229
+ onError?: (error: Error) => void;
1230
+ }
1231
+
1232
+ export declare type TMultimodalPrompt = {
1233
+ /**
1234
+ * Support use image to inspect elements.
1235
+ * The "images" field is an object that uses image name as key and image url as value.
1236
+ * The image url can be a local path, a http link , or a base64 string.
1237
+ */
1238
+ images?: {
1239
+ name: string;
1240
+ url: string;
1241
+ }[];
1242
+ /**
1243
+ * By default, the image url in the "images" filed starts with `https://` or `http://` will be directly sent to the LLM.
1244
+ * In case the images are not accessible to the LLM (One common case is that image url is internal network only.), you can enable this option.
1245
+ * Then image will be download and convert to base64 format.
1246
+ */
1247
+ convertHttpImage2Base64?: boolean;
1248
+ };
1249
+
1250
+ declare type TMultimodalPrompt_2 = {
1251
+ /**
1252
+ * Support use image to inspect elements.
1253
+ * The "images" field is an object that uses image name as key and image url as value.
1254
+ * The image url can be a local path, a http link , or a base64 string.
1255
+ */
1256
+ images?: {
1257
+ name: string;
1258
+ url: string;
1259
+ }[];
1260
+ /**
1261
+ * By default, the image url in the "images" filed starts with `https://` or `http://` will be directly sent to the LLM.
1262
+ * In case the images are not accessible to the LLM (One common case is that image url is internal network only.), you can enable this option.
1263
+ * Then image will be download and convert to base64 format.
1264
+ */
1265
+ convertHttpImage2Base64?: boolean;
1266
+ };
1267
+
1268
+ export declare type TUserPrompt = string | ({
1269
+ prompt: string;
1270
+ } & Partial<TMultimodalPrompt>);
1271
+
1272
+ declare type TUserPrompt_2 =
1273
+ | string
1274
+ | ({
1275
+ prompt: string;
1276
+ } & Partial<TMultimodalPrompt_2>);
1277
+
1278
+ /**
1279
+ * context
1280
+ */
1281
+ export declare abstract class UIContext<ElementType extends BaseElement = BaseElement> {
1282
+ abstract screenshotBase64: string;
1283
+ abstract tree: ElementTreeNode<ElementType>;
1284
+ abstract size: Size;
1285
+ }
1286
+
1287
+ /**
1288
+ * context
1289
+ */
1290
+
1291
+ declare abstract class UIContext_2<ElementType extends BaseElement = BaseElement> {
1292
+ abstract screenshotBase64: string;
1293
+
1294
+ abstract tree: ElementTreeNode<ElementType>;
1295
+
1296
+ abstract size: Size;
1297
+ }
1298
+
1299
+ export { }