@midscene/core 1.0.1-beta-20251024064637.0 → 1.0.1-beta-20251027033034.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -118,7 +118,7 @@ export declare class Agent<InterfaceType extends AbstractInterface = AbstractInt
118
118
  deepThink?: boolean;
119
119
  } & LocatorValidatorOption): Promise<AgentDescribeElementAtPointResult>;
120
120
  verifyLocator(prompt: string, locateOpt: LocateOption | undefined, expectCenter: [number, number], verifyLocateOption?: LocatorValidatorOption): Promise<LocateValidatorResult>;
121
- aiLocate(prompt: TUserPrompt, opt?: LocateOption): Promise<Pick<LocateResultElement, "center" | "rect"> & {
121
+ aiLocate(prompt: TUserPrompt, opt?: LocateOption): Promise<Pick<LocateResultElement, "rect" | "center"> & {
122
122
  dpr?: number;
123
123
  }>;
124
124
  aiAssert(assertion: TUserPrompt, msg?: string, opt?: AgentAssertOpt & InsightExtractOption): Promise<{
@@ -0,0 +1,57 @@
1
+ import type { DeviceAction } from '../types';
2
+ /**
3
+ * Android device input options
4
+ */
5
+ export type AndroidDeviceInputOpt = {
6
+ /** Automatically dismiss the keyboard after input is completed */
7
+ autoDismissKeyboard?: boolean;
8
+ /** Strategy for dismissing the keyboard: 'esc-first' tries ESC before BACK, 'back-first' tries BACK before ESC */
9
+ keyboardDismissStrategy?: 'esc-first' | 'back-first';
10
+ };
11
+ /**
12
+ * Android device options
13
+ */
14
+ export type AndroidDeviceOpt = {
15
+ /** Path to the ADB executable */
16
+ androidAdbPath?: string;
17
+ /** Remote ADB host address */
18
+ remoteAdbHost?: string;
19
+ /** Remote ADB port */
20
+ remoteAdbPort?: number;
21
+ /** Input method editor strategy: 'always-yadb' always uses yadb, 'yadb-for-non-ascii' uses yadb only for non-ASCII characters */
22
+ imeStrategy?: 'always-yadb' | 'yadb-for-non-ascii';
23
+ /** Display ID to use for this device */
24
+ displayId?: number;
25
+ /** Use physical display ID for screenshot operations */
26
+ usePhysicalDisplayIdForScreenshot?: boolean;
27
+ /** Use physical display ID when looking up display information */
28
+ usePhysicalDisplayIdForDisplayLookup?: boolean;
29
+ /** Custom device actions to register */
30
+ customActions?: DeviceAction<any>[];
31
+ /** Screenshot resize scale factor */
32
+ screenshotResizeScale?: number;
33
+ /** Always fetch screen info on each call; if false, cache the first result */
34
+ alwaysRefreshScreenInfo?: boolean;
35
+ } & AndroidDeviceInputOpt;
36
+ /**
37
+ * iOS device input options
38
+ */
39
+ export type IOSDeviceInputOpt = {
40
+ /** Automatically dismiss the keyboard after input is completed */
41
+ autoDismissKeyboard?: boolean;
42
+ };
43
+ /**
44
+ * iOS device options
45
+ */
46
+ export type IOSDeviceOpt = {
47
+ /** Device ID (UDID) to connect to */
48
+ deviceId?: string;
49
+ /** Custom device actions to register */
50
+ customActions?: DeviceAction<any>[];
51
+ /** WebDriverAgent port (default: 8100) */
52
+ wdaPort?: number;
53
+ /** WebDriverAgent host (default: 'localhost') */
54
+ wdaHost?: string;
55
+ /** Whether to use WebDriverAgent */
56
+ useWDA?: boolean;
57
+ } & IOSDeviceInputOpt;
@@ -1106,7 +1106,7 @@ export declare const actionScrollParamSchema: z.ZodObject<{
1106
1106
  xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
1107
1107
  }, z.ZodTypeAny, "passthrough">>>;
1108
1108
  }, "strip", z.ZodTypeAny, {
1109
- direction: "down" | "up" | "right" | "left";
1109
+ direction: "up" | "down" | "right" | "left";
1110
1110
  scrollType: "once" | "untilBottom" | "untilTop" | "untilRight" | "untilLeft";
1111
1111
  locate?: z.objectOutputType<{
1112
1112
  prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
@@ -1182,7 +1182,7 @@ export declare const actionScrollParamSchema: z.ZodObject<{
1182
1182
  cacheable: z.ZodOptional<z.ZodBoolean>;
1183
1183
  xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
1184
1184
  }, z.ZodTypeAny, "passthrough"> | undefined;
1185
- direction?: "down" | "up" | "right" | "left" | undefined;
1185
+ direction?: "up" | "down" | "right" | "left" | undefined;
1186
1186
  scrollType?: "once" | "untilBottom" | "untilTop" | "untilRight" | "untilLeft" | undefined;
1187
1187
  distance?: number | null | undefined;
1188
1188
  }>;
@@ -1848,7 +1848,7 @@ export declare const ActionSwipeParamSchema: z.ZodObject<{
1848
1848
  }, "strip", z.ZodTypeAny, {
1849
1849
  duration: number;
1850
1850
  repeat?: number | undefined;
1851
- direction?: "down" | "up" | "right" | "left" | undefined;
1851
+ direction?: "up" | "down" | "right" | "left" | undefined;
1852
1852
  distance?: number | undefined;
1853
1853
  start?: z.objectOutputType<{
1854
1854
  prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
@@ -1924,7 +1924,7 @@ export declare const ActionSwipeParamSchema: z.ZodObject<{
1924
1924
  }, z.ZodTypeAny, "passthrough"> | undefined;
1925
1925
  }, {
1926
1926
  repeat?: number | undefined;
1927
- direction?: "down" | "up" | "right" | "left" | undefined;
1927
+ direction?: "up" | "down" | "right" | "left" | undefined;
1928
1928
  distance?: number | undefined;
1929
1929
  duration?: number | undefined;
1930
1930
  start?: z.objectInputType<{
@@ -2156,3 +2156,4 @@ export type ActionClearInputParam = {
2156
2156
  };
2157
2157
  export declare const defineActionClearInput: (call: (param: ActionClearInputParam) => Promise<void>) => DeviceAction<ActionClearInputParam>;
2158
2158
  export type { DeviceAction } from '../types';
2159
+ export type { AndroidDeviceOpt, AndroidDeviceInputOpt, IOSDeviceOpt, IOSDeviceInputOpt, } from './device-options';
@@ -397,14 +397,9 @@ export interface AgentOpt {
397
397
  *
398
398
  * @example
399
399
  * ```typescript
400
- * createOpenAIClient: (config) => {
401
- * const openai = new OpenAI({
402
- * apiKey: config.openaiApiKey,
403
- * baseURL: config.openaiBaseURL,
404
- * });
405
- *
400
+ * createOpenAIClient: async (openai, opts) => {
406
401
  * // Wrap with langsmith for planning tasks
407
- * if (config.intent === 'planning') {
402
+ * if (opts.baseURL?.includes('planning')) {
408
403
  * return wrapOpenAI(openai, { metadata: { task: 'planning' } });
409
404
  * }
410
405
  *
@@ -1,4 +1,5 @@
1
1
  import type { TUserPrompt } from './ai-model/common';
2
+ import type { AndroidDeviceOpt, IOSDeviceOpt } from './device';
2
3
  import type { AgentOpt, Rect } from './types';
3
4
  import type { UIContext } from './types';
4
5
  export interface LocateOption {
@@ -68,14 +69,11 @@ export interface MidsceneYamlScriptWebEnv extends MidsceneYamlScriptConfig, Mids
68
69
  bridgeMode?: false | 'newTabWithUrl' | 'currentTab';
69
70
  closeNewTabsAfterDisconnect?: boolean;
70
71
  }
71
- export interface MidsceneYamlScriptAndroidEnv extends MidsceneYamlScriptConfig {
72
+ export interface MidsceneYamlScriptAndroidEnv extends MidsceneYamlScriptConfig, Omit<AndroidDeviceOpt, 'customActions'> {
72
73
  deviceId?: string;
73
74
  launch?: string;
74
75
  }
75
- export interface MidsceneYamlScriptIOSEnv extends MidsceneYamlScriptConfig {
76
- wdaPort?: number;
77
- wdaHost?: string;
78
- autoDismissKeyboard?: boolean;
76
+ export interface MidsceneYamlScriptIOSEnv extends MidsceneYamlScriptConfig, Omit<IOSDeviceOpt, 'customActions'> {
79
77
  launch?: string;
80
78
  }
81
79
  export type MidsceneYamlScriptEnv = MidsceneYamlScriptWebEnv | MidsceneYamlScriptAndroidEnv | MidsceneYamlScriptIOSEnv;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@midscene/core",
3
3
  "description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
4
- "version": "1.0.1-beta-20251024064637.0",
4
+ "version": "1.0.1-beta-20251027033034.0",
5
5
  "repository": "https://github.com/web-infra-dev/midscene",
6
6
  "homepage": "https://midscenejs.com/",
7
7
  "main": "./dist/lib/index.js",
@@ -89,8 +89,8 @@
89
89
  "zod": "3.24.3",
90
90
  "semver": "7.5.2",
91
91
  "js-yaml": "4.1.0",
92
- "@midscene/recorder": "1.0.1-beta-20251024064637.0",
93
- "@midscene/shared": "1.0.1-beta-20251024064637.0"
92
+ "@midscene/recorder": "1.0.1-beta-20251027033034.0",
93
+ "@midscene/shared": "1.0.1-beta-20251027033034.0"
94
94
  },
95
95
  "devDependencies": {
96
96
  "@rslib/core": "^0.11.2",