@midscene/core 0.26.3-beta-20250813113828.0 → 0.26.3-beta-20250814023647.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ import { ChatCompletionMessageParam } from 'openai/resources/index';
4
4
  import type { ChatCompletionSystemMessageParam } from 'openai/resources/index';
5
5
  import type { ChatCompletionUserMessageParam } from 'openai/resources/index';
6
6
  import { ElementTreeNode } from '@midscene/shared/types';
7
+ import type { NodeType } from '@midscene/shared/constants';
7
8
  import OpenAI from 'openai';
8
9
  import { Rect } from '@midscene/shared/types';
9
10
  import { Size } from '@midscene/shared/types';
@@ -206,7 +207,7 @@ declare interface DeviceAction<ParamType = any> {
206
207
  paramDescription?: string;
207
208
  location?: 'required' | 'optional' | false;
208
209
  whatToLocate?: string; // what to locate if location is required or optional
209
- call: (param: ParamType) => Promise<void> | void;
210
+ call: (context: ExecutorContext, param: ParamType) => Promise<void> | void;
210
211
  }
211
212
 
212
213
  declare type ElementById = (id: string) => BaseElement | null;
@@ -219,6 +220,85 @@ export declare function elementByPositionWithElementInfo(treeRoot: ElementTreeNo
219
220
  filterPositionElements?: boolean;
220
221
  }): BaseElement | undefined;
221
222
 
223
+ declare interface ExecutionRecorderItem {
224
+ type: 'screenshot';
225
+ ts: number;
226
+ screenshot?: string;
227
+ timing?: string;
228
+ }
229
+
230
+ declare type ExecutionTask<
231
+ E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<
232
+ any,
233
+ any,
234
+ any
235
+ >,
236
+ > = E &
237
+ ExecutionTaskReturn<
238
+ E extends ExecutionTaskApply<any, any, infer TaskOutput, any>
239
+ ? TaskOutput
240
+ : unknown,
241
+ E extends ExecutionTaskApply<any, any, any, infer TaskLog>
242
+ ? TaskLog
243
+ : unknown
244
+ > & {
245
+ status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';
246
+ error?: Error;
247
+ errorMessage?: string;
248
+ errorStack?: string;
249
+ timing?: {
250
+ start: number;
251
+ end?: number;
252
+ cost?: number;
253
+ };
254
+ usage?: AIUsageInfo;
255
+ };
256
+
257
+ declare interface ExecutionTaskApply<
258
+ Type extends ExecutionTaskType = any,
259
+ TaskParam = any,
260
+ TaskOutput = any,
261
+ TaskLog = any,
262
+ > {
263
+ type: Type;
264
+ subType?: string;
265
+ param?: TaskParam;
266
+ thought?: string;
267
+ locate?: PlanningLocateParam | null;
268
+ pageContext?: UIContext;
269
+ executor: (
270
+ param: TaskParam,
271
+ context: ExecutorContext,
272
+ ) => // biome-ignore lint/suspicious/noConfusingVoidType: <explanation>
273
+ | Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void>
274
+ | undefined
275
+ | void;
276
+ }
277
+
278
+ declare interface ExecutionTaskHitBy {
279
+ from: string;
280
+ context: Record<string, any>;
281
+ }
282
+
283
+ declare interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {
284
+ output?: TaskOutput;
285
+ log?: TaskLog;
286
+ recorder?: ExecutionRecorderItem[];
287
+ hitBy?: ExecutionTaskHitBy;
288
+ }
289
+
290
+ declare type ExecutionTaskType =
291
+ | 'Planning'
292
+ | 'Insight'
293
+ | 'Action'
294
+ | 'Assertion'
295
+ | 'Log';
296
+
297
+ declare interface ExecutorContext {
298
+ task: ExecutionTask;
299
+ element?: LocateResultElement | null;
300
+ }
301
+
222
302
  /**
223
303
  * Generates Playwright test code from recorded events
224
304
  */
@@ -252,6 +332,19 @@ declare interface LocateOption {
252
332
  pageContext?: UIContext<BaseElement>;
253
333
  }
254
334
 
335
+ declare type LocateResultElement = {
336
+ id: string;
337
+ indexId?: number;
338
+ center: [number, number];
339
+ rect: Rect;
340
+ xpaths: string[];
341
+ attributes: {
342
+ nodeType: NodeType;
343
+ [key: string]: string;
344
+ };
345
+ isOrderSensitive?: boolean;
346
+ };
347
+
255
348
  declare type MidsceneYamlFlowItem =
256
349
  | MidsceneYamlFlowItemAIAction
257
350
  | MidsceneYamlFlowItemAIAssert
@@ -303,7 +396,7 @@ declare interface MidsceneYamlFlowItemAIRightClick extends LocateOption {
303
396
 
304
397
  declare interface MidsceneYamlFlowItemAIScroll
305
398
  extends LocateOption,
306
- PlanningActionParamScroll {
399
+ ScrollParam {
307
400
  aiScroll: null;
308
401
  locate?: TUserPrompt; // which area to scroll, optional
309
402
  }
@@ -367,8 +460,6 @@ declare interface PlanningAction<ParamType = any> {
367
460
  locate?: PlanningLocateParam | null;
368
461
  }
369
462
 
370
- declare type PlanningActionParamScroll = scrollParam;
371
-
372
463
  declare interface PlanningAIResponse {
373
464
  action?: PlanningAction; // this is the qwen mode
374
465
  actions?: PlanningAction[];
@@ -413,7 +504,7 @@ declare interface ReferenceImage {
413
504
 
414
505
  export declare function resizeImageForUiTars(imageBase64: string, size: Size): Promise<string>;
415
506
 
416
- declare interface scrollParam {
507
+ declare interface ScrollParam {
417
508
  direction: 'down' | 'up' | 'right' | 'left';
418
509
  scrollType: 'once' | 'untilBottom' | 'untilTop' | 'untilRight' | 'untilLeft';
419
510
  distance?: null | number; // distance in px
@@ -208,6 +208,16 @@ declare type AIUsageInfo_2 = Record<string, any> & {
208
208
  time_cost: number | undefined;
209
209
  };
210
210
 
211
+ export declare interface AndroidLongPressParam {
212
+ duration?: number;
213
+ }
214
+
215
+ export declare interface AndroidPullParam {
216
+ direction: 'up' | 'down';
217
+ distance?: number;
218
+ duration?: number;
219
+ }
220
+
211
221
  export declare interface BaseAgentParserOpt {
212
222
  selector?: string;
213
223
  ignoreMarker?: boolean;
@@ -308,7 +318,7 @@ export declare interface DeviceAction<ParamType = any> {
308
318
  paramDescription?: string;
309
319
  location?: 'required' | 'optional' | false;
310
320
  whatToLocate?: string;
311
- call: (param: ParamType) => Promise<void> | void;
321
+ call: (context: ExecutorContext, param: ParamType) => Promise<void> | void;
312
322
  }
313
323
 
314
324
  declare interface DeviceAction_2<ParamType = any> {
@@ -318,7 +328,7 @@ declare interface DeviceAction_2<ParamType = any> {
318
328
  paramDescription?: string;
319
329
  location?: 'required' | 'optional' | false;
320
330
  whatToLocate?: string; // what to locate if location is required or optional
321
- call: (param: ParamType) => Promise<void> | void;
331
+ call: (context: ExecutorContext_2, param: ParamType) => Promise<void> | void;
322
332
  }
323
333
 
324
334
  export declare interface DumpMeta {
@@ -908,14 +918,14 @@ declare interface MidsceneYamlFlowItemAIRightClick_2 extends LocateOption_2 {
908
918
  aiRightClick: TUserPrompt_2;
909
919
  }
910
920
 
911
- export declare interface MidsceneYamlFlowItemAIScroll extends LocateOption, PlanningActionParamScroll {
921
+ export declare interface MidsceneYamlFlowItemAIScroll extends LocateOption, ScrollParam {
912
922
  aiScroll: null;
913
923
  locate?: TUserPrompt;
914
924
  }
915
925
 
916
926
  declare interface MidsceneYamlFlowItemAIScroll_2
917
927
  extends LocateOption_2,
918
- PlanningActionParamScroll_2 {
928
+ ScrollParam_2 {
919
929
  aiScroll: null;
920
930
  locate?: TUserPrompt_2; // which area to scroll, optional
921
931
  }
@@ -1067,22 +1077,6 @@ declare interface PlanningAction_2<ParamType = any> {
1067
1077
  locate?: PlanningLocateParam_2 | null;
1068
1078
  }
1069
1079
 
1070
- export declare interface PlanningActionParamAndroidLongPress {
1071
- x: number;
1072
- y: number;
1073
- duration?: number;
1074
- }
1075
-
1076
- export declare interface PlanningActionParamAndroidPull {
1077
- direction: 'up' | 'down';
1078
- startPoint?: {
1079
- x: number;
1080
- y: number;
1081
- };
1082
- distance?: number;
1083
- duration?: number;
1084
- }
1085
-
1086
1080
  export declare interface PlanningActionParamAssert {
1087
1081
  assertion: TUserPrompt;
1088
1082
  }
@@ -1100,10 +1094,6 @@ export declare interface PlanningActionParamInputOrKeyPress {
1100
1094
 
1101
1095
  export declare type PlanningActionParamRightClick = null;
1102
1096
 
1103
- export declare type PlanningActionParamScroll = scrollParam;
1104
-
1105
- declare type PlanningActionParamScroll_2 = scrollParam_2;
1106
-
1107
1097
  export declare interface PlanningActionParamSleep {
1108
1098
  timeMs: number;
1109
1099
  }
@@ -1193,13 +1183,13 @@ export declare interface ScriptPlayerTaskStatus extends MidsceneYamlTask {
1193
1183
  error?: Error;
1194
1184
  }
1195
1185
 
1196
- export declare interface scrollParam {
1186
+ export declare interface ScrollParam {
1197
1187
  direction: 'down' | 'up' | 'right' | 'left';
1198
1188
  scrollType: 'once' | 'untilBottom' | 'untilTop' | 'untilRight' | 'untilLeft';
1199
1189
  distance?: null | number;
1200
1190
  }
1201
1191
 
1202
- declare interface scrollParam_2 {
1192
+ declare interface ScrollParam_2 {
1203
1193
  direction: 'down' | 'up' | 'right' | 'left';
1204
1194
  scrollType: 'once' | 'untilBottom' | 'untilTop' | 'untilRight' | 'untilLeft';
1205
1195
  distance?: null | number; // distance in px
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@midscene/core",
3
3
  "description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
4
- "version": "0.26.3-beta-20250813113828.0",
4
+ "version": "0.26.3-beta-20250814023647.0",
5
5
  "repository": "https://github.com/web-infra-dev/midscene",
6
6
  "homepage": "https://midscenejs.com/",
7
7
  "main": "./dist/lib/index.js",
@@ -60,8 +60,8 @@
60
60
  "langsmith": "0.3.7",
61
61
  "openai": "4.81.0",
62
62
  "socks-proxy-agent": "8.0.4",
63
- "@midscene/recorder": "0.26.3-beta-20250813113828.0",
64
- "@midscene/shared": "0.26.3-beta-20250813113828.0"
63
+ "@midscene/recorder": "0.26.3-beta-20250814023647.0",
64
+ "@midscene/shared": "0.26.3-beta-20250814023647.0"
65
65
  },
66
66
  "devDependencies": {
67
67
  "@microsoft/api-extractor": "^7.52.10",