@midscene/core 1.0.1-beta-20251024063839.0 → 1.0.1-beta-20251024064637.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/dist/es/agent/agent.mjs +2 -3
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/index.mjs +2 -2
  4. package/dist/es/agent/task-builder.mjs +11 -7
  5. package/dist/es/agent/task-builder.mjs.map +1 -1
  6. package/dist/es/agent/tasks.mjs +8 -1
  7. package/dist/es/agent/tasks.mjs.map +1 -1
  8. package/dist/es/agent/ui-utils.mjs +10 -10
  9. package/dist/es/agent/ui-utils.mjs.map +1 -1
  10. package/dist/es/agent/utils.mjs +6 -50
  11. package/dist/es/agent/utils.mjs.map +1 -1
  12. package/dist/es/ai-model/common.mjs.map +1 -1
  13. package/dist/es/ai-model/index.mjs +2 -2
  14. package/dist/es/ai-model/inspect.mjs +12 -31
  15. package/dist/es/ai-model/inspect.mjs.map +1 -1
  16. package/dist/es/ai-model/prompt/util.mjs +3 -88
  17. package/dist/es/ai-model/prompt/util.mjs.map +1 -1
  18. package/dist/es/device/index.mjs.map +1 -1
  19. package/dist/es/insight/index.mjs +6 -15
  20. package/dist/es/insight/index.mjs.map +1 -1
  21. package/dist/es/tree.mjs +13 -2
  22. package/dist/es/tree.mjs.map +1 -0
  23. package/dist/es/types.mjs.map +1 -1
  24. package/dist/es/utils.mjs +2 -2
  25. package/dist/lib/agent/agent.js +1 -2
  26. package/dist/lib/agent/agent.js.map +1 -1
  27. package/dist/lib/agent/index.js +0 -3
  28. package/dist/lib/agent/task-builder.js +11 -7
  29. package/dist/lib/agent/task-builder.js.map +1 -1
  30. package/dist/lib/agent/tasks.js +8 -1
  31. package/dist/lib/agent/tasks.js.map +1 -1
  32. package/dist/lib/agent/ui-utils.js +10 -10
  33. package/dist/lib/agent/ui-utils.js.map +1 -1
  34. package/dist/lib/agent/utils.js +5 -52
  35. package/dist/lib/agent/utils.js.map +1 -1
  36. package/dist/lib/ai-model/common.js.map +1 -1
  37. package/dist/lib/ai-model/index.js +11 -14
  38. package/dist/lib/ai-model/inspect.js +11 -30
  39. package/dist/lib/ai-model/inspect.js.map +1 -1
  40. package/dist/lib/ai-model/prompt/util.js +5 -93
  41. package/dist/lib/ai-model/prompt/util.js.map +1 -1
  42. package/dist/lib/device/index.js.map +1 -1
  43. package/dist/lib/insight/index.js +6 -15
  44. package/dist/lib/insight/index.js.map +1 -1
  45. package/dist/lib/tree.js +10 -1
  46. package/dist/lib/tree.js.map +1 -1
  47. package/dist/lib/types.js.map +1 -1
  48. package/dist/lib/utils.js +2 -2
  49. package/dist/types/agent/agent.d.ts +1 -1
  50. package/dist/types/agent/index.d.ts +1 -1
  51. package/dist/types/agent/utils.d.ts +2 -33
  52. package/dist/types/ai-model/index.d.ts +1 -1
  53. package/dist/types/ai-model/inspect.d.ts +12 -10
  54. package/dist/types/ai-model/prompt/util.d.ts +2 -34
  55. package/dist/types/device/index.d.ts +4 -5
  56. package/dist/types/insight/index.d.ts +6 -6
  57. package/dist/types/tree.d.ts +4 -1
  58. package/dist/types/types.d.ts +6 -30
  59. package/dist/types/yaml.d.ts +7 -5
  60. package/package.json +3 -3
  61. package/dist/es/device/device-options.mjs +0 -0
  62. package/dist/lib/device/device-options.js +0 -20
  63. package/dist/lib/device/device-options.js.map +0 -1
  64. package/dist/types/device/device-options.d.ts +0 -57
@@ -118,7 +118,7 @@ export declare class Agent<InterfaceType extends AbstractInterface = AbstractInt
118
118
  deepThink?: boolean;
119
119
  } & LocatorValidatorOption): Promise<AgentDescribeElementAtPointResult>;
120
120
  verifyLocator(prompt: string, locateOpt: LocateOption | undefined, expectCenter: [number, number], verifyLocateOption?: LocatorValidatorOption): Promise<LocateValidatorResult>;
121
- aiLocate(prompt: TUserPrompt, opt?: LocateOption): Promise<Pick<LocateResultElement, "rect" | "center"> & {
121
+ aiLocate(prompt: TUserPrompt, opt?: LocateOption): Promise<Pick<LocateResultElement, "center" | "rect"> & {
122
122
  dpr?: number;
123
123
  }>;
124
124
  aiAssert(assertion: TUserPrompt, msg?: string, opt?: AgentAssertOpt & InsightExtractOption): Promise<{
@@ -5,5 +5,5 @@ export { locateParamStr, paramStr, taskTitleStr, typeStr } from './ui-utils';
5
5
  export { type LocateCache, type PlanningCache, TaskCache } from './task-cache';
6
6
  export { cacheFileExt } from './task-cache';
7
7
  export { TaskExecutor } from './tasks';
8
- export { getCurrentExecutionFile, trimContextByViewport, } from './utils';
8
+ export { getCurrentExecutionFile } from './utils';
9
9
  export type { AgentOpt } from '../types';
@@ -1,6 +1,6 @@
1
1
  import type { TMultimodalPrompt, TUserPrompt } from '../ai-model/common';
2
2
  import type { AbstractInterface } from '../device';
3
- import type { BaseElement, ElementCacheFeature, ElementTreeNode, ExecutionDump, ExecutorContext, LocateResultElement, PlanningLocateParam, UIContext } from '../types';
3
+ import type { ElementCacheFeature, LocateResultElement, PlanningLocateParam, UIContext } from '../types';
4
4
  import type { TaskCache } from './task-cache';
5
5
  export declare function commonContextParser(interfaceInstance: AbstractInterface, _opt: {
6
6
  uploadServerUrl?: string;
@@ -13,42 +13,11 @@ export declare function printReportMsg(filepath: string): void;
13
13
  */
14
14
  export declare function getCurrentExecutionFile(trace?: string): string | false;
15
15
  export declare function generateCacheId(fileName?: string): string;
16
- export declare function matchElementFromPlan(planLocateParam: PlanningLocateParam, tree: ElementTreeNode<BaseElement>): any;
16
+ export declare function matchElementFromPlan(planLocateParam: PlanningLocateParam): LocateResultElement | undefined;
17
17
  export declare function matchElementFromCache(context: {
18
18
  taskCache?: TaskCache;
19
19
  interfaceInstance: AbstractInterface;
20
20
  }, cacheEntry: ElementCacheFeature | undefined, cachePrompt: TUserPrompt, cacheable: boolean | undefined): Promise<LocateResultElement | undefined>;
21
- export declare function trimContextByViewport(execution: ExecutionDump): {
22
- tasks: {
23
- type: any;
24
- subType?: string;
25
- subTask?: boolean;
26
- param?: any;
27
- thought?: string;
28
- locate?: PlanningLocateParam | null;
29
- uiContext?: UIContext;
30
- executor: (param: any, context: ExecutorContext) => void | Promise<void | import("../types").ExecutionTaskReturn<any, any> | undefined> | undefined;
31
- output?: any;
32
- log?: any;
33
- recorder?: import("../types").ExecutionRecorderItem[];
34
- hitBy?: import("../types").ExecutionTaskHitBy;
35
- status: "pending" | "running" | "finished" | "failed" | "cancelled";
36
- error?: Error;
37
- errorMessage?: string;
38
- errorStack?: string;
39
- timing?: {
40
- start: number;
41
- end?: number;
42
- cost?: number;
43
- };
44
- usage?: import("../types").AIUsageInfo;
45
- searchAreaUsage?: import("../types").AIUsageInfo;
46
- }[];
47
- name: string;
48
- description?: string;
49
- aiActionContext?: string;
50
- logTime: number;
51
- };
52
21
  export declare const getMidsceneVersion: () => string;
53
22
  export declare const parsePrompt: (prompt: TUserPrompt) => {
54
23
  textPrompt: string;
@@ -1,6 +1,6 @@
1
1
  export { callAIWithStringResponse, callAIWithObjectResponse, callAI, } from './service-caller/index';
2
2
  export { systemPromptToLocateElement } from './prompt/llm-locator';
3
- export { describeUserPage, elementByPositionWithElementInfo, } from './prompt/util';
3
+ export { describeUserPage } from './prompt/util';
4
4
  export { generatePlaywrightTest, generatePlaywrightTestStream, } from './prompt/playwright-generator';
5
5
  export { generateYamlTest, generateYamlTestStream, } from './prompt/yaml-generator';
6
6
  export type { ChatCompletionMessageParam } from 'openai/resources/index';
@@ -1,5 +1,6 @@
1
- import type { AIDataExtractionResponse, AIElementLocatorResponse, AIElementResponse, AIUsageInfo, BaseElement, ElementById, InsightExtractOption, Rect, ReferenceImage, UIContext } from '../types';
1
+ import type { AIDataExtractionResponse, AIElementResponse, AIUsageInfo, InsightExtractOption, Rect, ReferenceImage, UIContext } from '../types';
2
2
  import type { IModelConfig } from '@midscene/shared/env';
3
+ import type { LocateResultElement } from '@midscene/shared/types';
3
4
  import type { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources/index';
4
5
  import type { TMultimodalPrompt, TUserPrompt } from './common';
5
6
  import { callAIWithObjectResponse } from './service-caller/index';
@@ -7,23 +8,24 @@ export type AIArgs = [
7
8
  ChatCompletionSystemMessageParam,
8
9
  ...ChatCompletionUserMessageParam[]
9
10
  ];
10
- export declare function AiLocateElement<ElementType extends BaseElement = BaseElement>(options: {
11
- context: UIContext<ElementType>;
11
+ export declare function AiLocateElement(options: {
12
+ context: UIContext;
12
13
  targetElementDescription: TUserPrompt;
13
14
  referenceImage?: ReferenceImage;
14
15
  callAIFn: typeof callAIWithObjectResponse<AIElementResponse | [number, number]>;
15
16
  searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;
16
17
  modelConfig: IModelConfig;
17
18
  }): Promise<{
18
- parseResult: AIElementLocatorResponse;
19
+ parseResult: {
20
+ elements: LocateResultElement[];
21
+ errors?: string[];
22
+ };
19
23
  rect?: Rect;
20
24
  rawResponse: string;
21
- elementById: ElementById;
22
25
  usage?: AIUsageInfo;
23
- isOrderSensitive?: boolean;
24
26
  }>;
25
27
  export declare function AiLocateSection(options: {
26
- context: UIContext<BaseElement>;
28
+ context: UIContext;
27
29
  sectionDescription: TUserPrompt;
28
30
  modelConfig: IModelConfig;
29
31
  }): Promise<{
@@ -33,14 +35,14 @@ export declare function AiLocateSection(options: {
33
35
  rawResponse: string;
34
36
  usage?: AIUsageInfo;
35
37
  }>;
36
- export declare function AiExtractElementInfo<T, ElementType extends BaseElement = BaseElement>(options: {
38
+ export declare function AiExtractElementInfo<T>(options: {
37
39
  dataQuery: string | Record<string, string>;
38
40
  multimodalPrompt?: TMultimodalPrompt;
39
- context: UIContext<ElementType>;
41
+ context: UIContext;
42
+ pageDescription?: string;
40
43
  extractOption?: InsightExtractOption;
41
44
  modelConfig: IModelConfig;
42
45
  }): Promise<{
43
46
  parseResult: AIDataExtractionResponse<T>;
44
- elementById: (idOrIndexId: string) => ElementType;
45
47
  usage: AIUsageInfo | undefined;
46
48
  }>;
@@ -1,17 +1,9 @@
1
- import type { BaseElement, ElementTreeNode, Size, UIContext } from '../../types';
2
- import type { TVlModeTypes } from '@midscene/shared/env';
1
+ import type { BaseElement, Size, UIContext } from '../../types';
3
2
  export declare function describeSize(size: Size): string;
4
3
  export declare function describeElement(elements: (Pick<BaseElement, 'rect' | 'content'> & {
5
4
  id: string;
6
5
  })[]): string;
7
6
  export declare const distanceThreshold = 16;
8
- export declare function elementByPositionWithElementInfo(treeRoot: ElementTreeNode<BaseElement>, position: {
9
- x: number;
10
- y: number;
11
- }, options?: {
12
- requireStrictDistance?: boolean;
13
- filterPositionElements?: boolean;
14
- }): BaseElement | undefined;
15
7
  export declare function distance(point1: {
16
8
  x: number;
17
9
  y: number;
@@ -20,28 +12,4 @@ export declare function distance(point1: {
20
12
  y: number;
21
13
  }): number;
22
14
  export declare const samplePageDescription = "\nAnd the page is described as follows:\n====================\nThe size of the page: 1280 x 720\nSome of the elements are marked with a rectangle in the screenshot corresponding to the markerId, some are not.\n\nDescription of all the elements in screenshot:\n<div id=\"969f1637\" markerId=\"1\" left=\"100\" top=\"100\" width=\"100\" height=\"100\"> // The markerId indicated by the rectangle label in the screenshot\n <h4 id=\"b211ecb2\" markerId=\"5\" left=\"150\" top=\"150\" width=\"90\" height=\"60\">\n The username is accepted\n </h4>\n ...many more\n</div>\n====================\n";
23
- export declare function describeUserPage<ElementType extends BaseElement = BaseElement>(context: Omit<UIContext<ElementType>, 'describer'>, opt: {
24
- truncateTextLength?: number;
25
- filterNonTextContent?: boolean;
26
- domIncluded?: boolean | 'visible-only';
27
- visibleOnly?: boolean;
28
- vlMode: TVlModeTypes | undefined;
29
- }): Promise<{
30
- description: string;
31
- elementById(idOrIndexId: string): ElementType;
32
- elementByPosition(position: {
33
- x: number;
34
- y: number;
35
- }, size: {
36
- width: number;
37
- height: number;
38
- }): BaseElement | undefined;
39
- insertElementByPosition(position: {
40
- x: number;
41
- y: number;
42
- }): ElementType;
43
- size: {
44
- width: number;
45
- height: number;
46
- };
47
- }>;
15
+ export declare function describeUserPage(context: UIContext): Promise<string>;
@@ -1106,7 +1106,7 @@ export declare const actionScrollParamSchema: z.ZodObject<{
1106
1106
  xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
1107
1107
  }, z.ZodTypeAny, "passthrough">>>;
1108
1108
  }, "strip", z.ZodTypeAny, {
1109
- direction: "up" | "down" | "right" | "left";
1109
+ direction: "down" | "up" | "right" | "left";
1110
1110
  scrollType: "once" | "untilBottom" | "untilTop" | "untilRight" | "untilLeft";
1111
1111
  locate?: z.objectOutputType<{
1112
1112
  prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
@@ -1182,7 +1182,7 @@ export declare const actionScrollParamSchema: z.ZodObject<{
1182
1182
  cacheable: z.ZodOptional<z.ZodBoolean>;
1183
1183
  xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
1184
1184
  }, z.ZodTypeAny, "passthrough"> | undefined;
1185
- direction?: "up" | "down" | "right" | "left" | undefined;
1185
+ direction?: "down" | "up" | "right" | "left" | undefined;
1186
1186
  scrollType?: "once" | "untilBottom" | "untilTop" | "untilRight" | "untilLeft" | undefined;
1187
1187
  distance?: number | null | undefined;
1188
1188
  }>;
@@ -1848,7 +1848,7 @@ export declare const ActionSwipeParamSchema: z.ZodObject<{
1848
1848
  }, "strip", z.ZodTypeAny, {
1849
1849
  duration: number;
1850
1850
  repeat?: number | undefined;
1851
- direction?: "up" | "down" | "right" | "left" | undefined;
1851
+ direction?: "down" | "up" | "right" | "left" | undefined;
1852
1852
  distance?: number | undefined;
1853
1853
  start?: z.objectOutputType<{
1854
1854
  prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
@@ -1924,7 +1924,7 @@ export declare const ActionSwipeParamSchema: z.ZodObject<{
1924
1924
  }, z.ZodTypeAny, "passthrough"> | undefined;
1925
1925
  }, {
1926
1926
  repeat?: number | undefined;
1927
- direction?: "up" | "down" | "right" | "left" | undefined;
1927
+ direction?: "down" | "up" | "right" | "left" | undefined;
1928
1928
  distance?: number | undefined;
1929
1929
  duration?: number | undefined;
1930
1930
  start?: z.objectInputType<{
@@ -2156,4 +2156,3 @@ export type ActionClearInputParam = {
2156
2156
  };
2157
2157
  export declare const defineActionClearInput: (call: (param: ActionClearInputParam) => Promise<void>) => DeviceAction<ActionClearInputParam>;
2158
2158
  export type { DeviceAction } from '../types';
2159
- export type { AndroidDeviceOpt, AndroidDeviceInputOpt, IOSDeviceOpt, IOSDeviceInputOpt, } from './device-options';
@@ -1,9 +1,9 @@
1
1
  import { callAIWithObjectResponse } from '../ai-model/index';
2
- import type { AIDescribeElementResponse, BaseElement, DetailedLocateParam, InsightExtractOption, InsightExtractParam, InsightExtractResult, InsightTaskInfo, LocateResultWithDump, Rect, UIContext } from '../types';
2
+ import type { AIDescribeElementResponse, DetailedLocateParam, InsightExtractOption, InsightExtractParam, InsightExtractResult, InsightTaskInfo, LocateResultWithDump, Rect, UIContext } from '../types';
3
3
  import { type IModelConfig } from '@midscene/shared/env';
4
4
  import type { TMultimodalPrompt } from '../ai-model/common';
5
5
  export interface LocateOpts {
6
- context?: UIContext<BaseElement>;
6
+ context?: UIContext;
7
7
  }
8
8
  export type AnyValue<T> = {
9
9
  [K in keyof T]: unknown extends T[K] ? any : T[K];
@@ -12,13 +12,13 @@ interface InsightOptions {
12
12
  taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;
13
13
  aiVendorFn?: typeof callAIWithObjectResponse;
14
14
  }
15
- export default class Insight<ElementType extends BaseElement = BaseElement, ContextType extends UIContext<ElementType> = UIContext<ElementType>> {
16
- contextRetrieverFn: () => Promise<ContextType> | ContextType;
15
+ export default class Insight {
16
+ contextRetrieverFn: () => Promise<UIContext> | UIContext;
17
17
  aiVendorFn: Exclude<InsightOptions['aiVendorFn'], undefined>;
18
18
  taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;
19
- constructor(context: ContextType | (() => Promise<ContextType> | ContextType), opt?: InsightOptions);
19
+ constructor(context: UIContext | (() => Promise<UIContext> | UIContext), opt?: InsightOptions);
20
20
  locate(query: DetailedLocateParam, opt: LocateOpts, modelConfig: IModelConfig): Promise<LocateResultWithDump>;
21
- extract<T>(dataDemand: InsightExtractParam, modelConfig: IModelConfig, opt?: InsightExtractOption, multimodalPrompt?: TMultimodalPrompt): Promise<InsightExtractResult<T>>;
21
+ extract<T>(dataDemand: InsightExtractParam, modelConfig: IModelConfig, opt?: InsightExtractOption, pageDescription?: string, multimodalPrompt?: TMultimodalPrompt): Promise<InsightExtractResult<T>>;
22
22
  describe(target: Rect | [number, number], modelConfig: IModelConfig, opt?: {
23
23
  deepThink?: boolean;
24
24
  }): Promise<Pick<AIDescribeElementResponse, 'description'>>;
@@ -1 +1,4 @@
1
- export { truncateText, trimAttributes, descriptionOfTree, } from '@midscene/shared/extractor';
1
+ import type { BaseElement, ElementTreeNode } from '@midscene/shared/types';
2
+ import { trimAttributes, truncateText } from '@midscene/shared/extractor';
3
+ export { trimAttributes, truncateText };
4
+ export declare function descriptionOfTree<ElementType extends BaseElement = BaseElement>(tree: ElementTreeNode<ElementType>, truncateTextLength?: number, filterNonTextContent?: boolean, visibleOnly?: boolean): string;
@@ -1,6 +1,6 @@
1
1
  import type { NodeType } from '@midscene/shared/constants';
2
2
  import type { CreateOpenAIClientFn, TModelConfigFn } from '@midscene/shared/env';
3
- import type { BaseElement, ElementTreeNode, Rect, Size } from '@midscene/shared/types';
3
+ import type { BaseElement, LocateResultElement, Rect, Size } from '@midscene/shared/types';
4
4
  import type { z } from 'zod';
5
5
  import type { TUserPrompt } from './ai-model/common';
6
6
  import type { DetailedLocateParam, MidsceneYamlFlowItem } from './yaml';
@@ -15,6 +15,7 @@ export type AIUsageInfo = Record<string, any> & {
15
15
  model_description: string | undefined;
16
16
  intent: string | undefined;
17
17
  };
18
+ export type { LocateResultElement };
18
19
  /**
19
20
  * openai
20
21
  *
@@ -39,23 +40,12 @@ export type AISingleElementResponseByPosition = {
39
40
  text: string;
40
41
  };
41
42
  export type AISingleElementResponse = AISingleElementResponseById;
42
- export interface AIElementLocatorResponse {
43
- elements: {
44
- id: string;
45
- reason?: string;
46
- text?: string;
47
- xpaths?: string[];
48
- }[];
49
- bbox?: [number, number, number, number];
50
- isOrderSensitive?: boolean;
51
- errors?: string[];
52
- }
53
43
  export interface AIElementCoordinatesResponse {
54
44
  bbox: [number, number, number, number];
55
45
  isOrderSensitive?: boolean;
56
46
  errors?: string[];
57
47
  }
58
- export type AIElementResponse = AIElementLocatorResponse | AIElementCoordinatesResponse;
48
+ export type AIElementResponse = AIElementCoordinatesResponse;
59
49
  export interface AIDataExtractionResponse<DataDemand> {
60
50
  data: DataDemand;
61
51
  errors?: string[];
@@ -91,9 +81,8 @@ export interface AgentDescribeElementAtPointResult {
91
81
  /**
92
82
  * context
93
83
  */
94
- export declare abstract class UIContext<ElementType extends BaseElement = BaseElement> {
84
+ export declare abstract class UIContext {
95
85
  abstract screenshotBase64: string;
96
- abstract tree: ElementTreeNode<ElementType>;
97
86
  abstract size: Size;
98
87
  abstract _isFrozen?: boolean;
99
88
  }
@@ -103,18 +92,6 @@ export type EnsureObject<T> = {
103
92
  export type InsightAction = 'locate' | 'extract' | 'assert' | 'describe';
104
93
  export type InsightExtractParam = string | Record<string, string>;
105
94
  export type ElementCacheFeature = Record<string, unknown>;
106
- export type LocateResultElement = {
107
- center: [number, number];
108
- rect: Rect;
109
- id: string;
110
- indexId?: number;
111
- xpaths: string[];
112
- attributes: {
113
- nodeType: NodeType;
114
- [key: string]: string;
115
- };
116
- isOrderSensitive?: boolean;
117
- };
118
95
  export interface LocateResult {
119
96
  element: LocateResultElement | null;
120
97
  rect?: Rect;
@@ -143,7 +120,7 @@ export interface InsightDump extends DumpMeta {
143
120
  dataDemand?: InsightExtractParam;
144
121
  assertion?: TUserPrompt;
145
122
  };
146
- matchedElement: BaseElement[];
123
+ matchedElement: LocateResultElement[];
147
124
  matchedRect?: Rect;
148
125
  deepThink?: boolean;
149
126
  data: any;
@@ -193,7 +170,6 @@ export interface AgentAssertOpt {
193
170
  *
194
171
  */
195
172
  export interface PlanningLocateParam extends DetailedLocateParam {
196
- id?: string;
197
173
  bbox?: [number, number, number, number];
198
174
  }
199
175
  export interface PlanningAction<ParamType = any> {
@@ -385,7 +361,7 @@ export interface WebElementInfo extends BaseElement {
385
361
  [key: string]: string;
386
362
  };
387
363
  }
388
- export type WebUIContext = UIContext<WebElementInfo>;
364
+ export type WebUIContext = UIContext;
389
365
  /**
390
366
  * Agent
391
367
  */
@@ -1,13 +1,12 @@
1
1
  import type { TUserPrompt } from './ai-model/common';
2
- import type { AndroidDeviceOpt, IOSDeviceOpt } from './device';
3
2
  import type { AgentOpt, Rect } from './types';
4
- import type { BaseElement, UIContext } from './types';
3
+ import type { UIContext } from './types';
5
4
  export interface LocateOption {
6
5
  prompt?: TUserPrompt;
7
6
  deepThink?: boolean;
8
7
  cacheable?: boolean;
9
8
  xpath?: string;
10
- uiContext?: UIContext<BaseElement>;
9
+ uiContext?: UIContext;
11
10
  }
12
11
  export interface InsightExtractOption {
13
12
  domIncluded?: boolean | 'visible-only';
@@ -69,11 +68,14 @@ export interface MidsceneYamlScriptWebEnv extends MidsceneYamlScriptConfig, Mids
69
68
  bridgeMode?: false | 'newTabWithUrl' | 'currentTab';
70
69
  closeNewTabsAfterDisconnect?: boolean;
71
70
  }
72
- export interface MidsceneYamlScriptAndroidEnv extends MidsceneYamlScriptConfig, Omit<AndroidDeviceOpt, 'customActions'> {
71
+ export interface MidsceneYamlScriptAndroidEnv extends MidsceneYamlScriptConfig {
73
72
  deviceId?: string;
74
73
  launch?: string;
75
74
  }
76
- export interface MidsceneYamlScriptIOSEnv extends MidsceneYamlScriptConfig, Omit<IOSDeviceOpt, 'customActions'> {
75
+ export interface MidsceneYamlScriptIOSEnv extends MidsceneYamlScriptConfig {
76
+ wdaPort?: number;
77
+ wdaHost?: string;
78
+ autoDismissKeyboard?: boolean;
77
79
  launch?: string;
78
80
  }
79
81
  export type MidsceneYamlScriptEnv = MidsceneYamlScriptWebEnv | MidsceneYamlScriptAndroidEnv | MidsceneYamlScriptIOSEnv;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@midscene/core",
3
3
  "description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
4
- "version": "1.0.1-beta-20251024063839.0",
4
+ "version": "1.0.1-beta-20251024064637.0",
5
5
  "repository": "https://github.com/web-infra-dev/midscene",
6
6
  "homepage": "https://midscenejs.com/",
7
7
  "main": "./dist/lib/index.js",
@@ -89,8 +89,8 @@
89
89
  "zod": "3.24.3",
90
90
  "semver": "7.5.2",
91
91
  "js-yaml": "4.1.0",
92
- "@midscene/recorder": "1.0.1-beta-20251024063839.0",
93
- "@midscene/shared": "1.0.1-beta-20251024063839.0"
92
+ "@midscene/recorder": "1.0.1-beta-20251024064637.0",
93
+ "@midscene/shared": "1.0.1-beta-20251024064637.0"
94
94
  },
95
95
  "devDependencies": {
96
96
  "@rslib/core": "^0.11.2",
File without changes
@@ -1,20 +0,0 @@
1
- "use strict";
2
- var __webpack_require__ = {};
3
- (()=>{
4
- __webpack_require__.r = (exports1)=>{
5
- if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
6
- value: 'Module'
7
- });
8
- Object.defineProperty(exports1, '__esModule', {
9
- value: true
10
- });
11
- };
12
- })();
13
- var __webpack_exports__ = {};
14
- __webpack_require__.r(__webpack_exports__);
15
- for(var __webpack_i__ in __webpack_exports__)exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
16
- Object.defineProperty(exports, '__esModule', {
17
- value: true
18
- });
19
-
20
- //# sourceMappingURL=device-options.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"device/device-options.js","sources":["webpack://@midscene/core/webpack/runtime/make_namespace_object"],"sourcesContent":["// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};"],"names":["__webpack_require__","Symbol","Object"],"mappings":";;;IACAA,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOC,UAA0BA,OAAO,WAAW,EACrDC,OAAO,cAAc,CAAC,UAASD,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEC,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D"}
@@ -1,57 +0,0 @@
1
- import type { DeviceAction } from '../types';
2
- /**
3
- * Android device input options
4
- */
5
- export type AndroidDeviceInputOpt = {
6
- /** Automatically dismiss the keyboard after input is completed */
7
- autoDismissKeyboard?: boolean;
8
- /** Strategy for dismissing the keyboard: 'esc-first' tries ESC before BACK, 'back-first' tries BACK before ESC */
9
- keyboardDismissStrategy?: 'esc-first' | 'back-first';
10
- };
11
- /**
12
- * Android device options
13
- */
14
- export type AndroidDeviceOpt = {
15
- /** Path to the ADB executable */
16
- androidAdbPath?: string;
17
- /** Remote ADB host address */
18
- remoteAdbHost?: string;
19
- /** Remote ADB port */
20
- remoteAdbPort?: number;
21
- /** Input method editor strategy: 'always-yadb' always uses yadb, 'yadb-for-non-ascii' uses yadb only for non-ASCII characters */
22
- imeStrategy?: 'always-yadb' | 'yadb-for-non-ascii';
23
- /** Display ID to use for this device */
24
- displayId?: number;
25
- /** Use physical display ID for screenshot operations */
26
- usePhysicalDisplayIdForScreenshot?: boolean;
27
- /** Use physical display ID when looking up display information */
28
- usePhysicalDisplayIdForDisplayLookup?: boolean;
29
- /** Custom device actions to register */
30
- customActions?: DeviceAction<any>[];
31
- /** Screenshot resize scale factor */
32
- screenshotResizeScale?: number;
33
- /** Always fetch screen info on each call; if false, cache the first result */
34
- alwaysRefreshScreenInfo?: boolean;
35
- } & AndroidDeviceInputOpt;
36
- /**
37
- * iOS device input options
38
- */
39
- export type IOSDeviceInputOpt = {
40
- /** Automatically dismiss the keyboard after input is completed */
41
- autoDismissKeyboard?: boolean;
42
- };
43
- /**
44
- * iOS device options
45
- */
46
- export type IOSDeviceOpt = {
47
- /** Device ID (UDID) to connect to */
48
- deviceId?: string;
49
- /** Custom device actions to register */
50
- customActions?: DeviceAction<any>[];
51
- /** WebDriverAgent port (default: 8100) */
52
- wdaPort?: number;
53
- /** WebDriverAgent host (default: 'localhost') */
54
- wdaHost?: string;
55
- /** Whether to use WebDriverAgent */
56
- useWDA?: boolean;
57
- } & IOSDeviceInputOpt;