@midscene/shared 1.5.1-beta-20260302102736.0 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,16 +6,16 @@ IMPORTANT: Follow these exact type signatures for AI functions:
6
6
 
7
7
  // Type signatures for AI functions:
8
8
  aiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<void>
9
- aiInput(text: string, locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
10
- aiTap(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
11
- aiHover(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
12
- aiDoubleClick(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
13
- aiKeyboardPress(key: string, locate?: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
9
+ aiInput(text: string, locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
10
+ aiTap(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
11
+ aiHover(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
12
+ aiDoubleClick(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
13
+ aiKeyboardPress(key: string, locate?: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
14
14
  aiScroll(locate: string | undefined, options: {
15
15
  direction?: 'up' | 'down' | 'left' | 'right',
16
16
  scrollType?: 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft',
17
17
  distance?: number | null,
18
- deepThink?: boolean,
18
+ deepLocate?: boolean,
19
19
  xpath?: string,
20
20
  cacheable?: boolean
21
21
  }): Promise<void>
@@ -109,7 +109,7 @@ tasks:
109
109
  3. Best Practices:
110
110
  - Group related actions into logical tasks
111
111
  - Use natural language descriptions
112
- - Add deepThink: true for complex interactions
112
+ - Add deepLocate: true for complex interactions
113
113
  - Keep task names concise but descriptive
114
114
 
115
115
  4. CRITICAL - YAML Indentation Rules:
@@ -147,33 +147,33 @@ tasks:
147
147
 
148
148
  # Tap an element described by a prompt.
149
149
  - aiTap: <prompt>
150
- deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
150
+ deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
151
151
  xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
152
152
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
153
153
 
154
154
  # Double click an element described by a prompt.
155
155
  - aiDoubleClick: <prompt>
156
- deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
156
+ deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
157
157
  xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
158
158
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
159
159
 
160
160
  # Hover over an element described by a prompt.
161
161
  - aiHover: <prompt>
162
- deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
162
+ deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
163
163
  xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
164
164
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
165
165
 
166
166
  # Input text into an element described by a prompt.
167
167
  - aiInput: <final text content of the input>
168
168
  locate: <prompt>
169
- deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
169
+ deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
170
170
  xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
171
171
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
172
172
 
173
173
  # Press a key (e.g., Enter, Tab, Escape) on an element described by a prompt.
174
174
  - aiKeyboardPress: <key>
175
175
  locate: <prompt>
176
- deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
176
+ deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
177
177
  xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
178
178
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
179
179
 
@@ -183,7 +183,7 @@ tasks:
183
183
  scrollType: 'singleAction' # or 'scrollToTop' | 'scrollToBottom' | 'scrollToLeft' | 'scrollToRight'
184
184
  distance: <number> # Optional, the scroll distance in pixels.
185
185
  locate: <prompt> # Optional, the element to scroll on.
186
- deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
186
+ deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
187
187
  xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
188
188
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
189
189
 
@@ -8,7 +8,6 @@ const MIDSCENE_MCP_USE_PUPPETEER_MODE = 'MIDSCENE_MCP_USE_PUPPETEER_MODE';
8
8
  const MIDSCENE_MCP_CHROME_PATH = 'MIDSCENE_MCP_CHROME_PATH';
9
9
  const MIDSCENE_MCP_ANDROID_MODE = 'MIDSCENE_MCP_ANDROID_MODE';
10
10
  const DOCKER_CONTAINER = 'DOCKER_CONTAINER';
11
- const MIDSCENE_FORCE_DEEP_THINK = 'MIDSCENE_FORCE_DEEP_THINK';
12
11
  const MIDSCENE_LANGSMITH_DEBUG = 'MIDSCENE_LANGSMITH_DEBUG';
13
12
  const MIDSCENE_LANGFUSE_DEBUG = 'MIDSCENE_LANGFUSE_DEBUG';
14
13
  const MIDSCENE_MODEL_SOCKS_PROXY = 'MIDSCENE_MODEL_SOCKS_PROXY';
@@ -89,7 +88,6 @@ const BASIC_ENV_KEYS = [
89
88
  ];
90
89
  const BOOLEAN_ENV_KEYS = [
91
90
  MIDSCENE_CACHE,
92
- MIDSCENE_FORCE_DEEP_THINK,
93
91
  MIDSCENE_MCP_USE_PUPPETEER_MODE,
94
92
  MIDSCENE_MCP_ANDROID_MODE,
95
93
  MIDSCENE_LANGSMITH_DEBUG,
@@ -207,4 +205,4 @@ var types_UITarsModelVersion = /*#__PURE__*/ function(UITarsModelVersion) {
207
205
  UITarsModelVersion["DOUBAO_1_5_20B"] = "doubao-1.5-20B";
208
206
  return UITarsModelVersion;
209
207
  }({});
210
- export { ALL_ENV_KEYS, BASIC_ENV_KEYS, BOOLEAN_ENV_KEYS, DOCKER_CONTAINER, GLOBAL_ENV_KEYS, MATCH_BY_POSITION, MIDSCENE_ADB_PATH, MIDSCENE_ADB_REMOTE_HOST, MIDSCENE_ADB_REMOTE_PORT, MIDSCENE_ANDROID_IME_STRATEGY, MIDSCENE_CACHE, MIDSCENE_CACHE_MAX_FILENAME_LENGTH, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_MODE, MIDSCENE_DEBUG_MODEL_PROFILE, MIDSCENE_DEBUG_MODEL_RESPONSE, MIDSCENE_FORCE_DEEP_THINK, MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_FAMILY, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET, MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT, MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED, MIDSCENE_INSIGHT_MODEL_RETRY_COUNT, MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_INSIGHT_MODEL_TEMPERATURE, MIDSCENE_INSIGHT_MODEL_TIMEOUT, MIDSCENE_IOS_DEVICE_UDID, MIDSCENE_IOS_SIMULATOR_UDID, MIDSCENE_LANGFUSE_DEBUG, MIDSCENE_LANGSMITH_DEBUG, MIDSCENE_MCP_ANDROID_MODE, MIDSCENE_MCP_CHROME_PATH, MIDSCENE_MCP_USE_PUPPETEER_MODE, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_FAMILY, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_MAX_TOKENS, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_REASONING_BUDGET, MIDSCENE_MODEL_REASONING_EFFORT, MIDSCENE_MODEL_REASONING_ENABLED, MIDSCENE_MODEL_RETRY_COUNT, MIDSCENE_MODEL_RETRY_INTERVAL, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_MODEL_TEMPERATURE, MIDSCENE_MODEL_TIMEOUT, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_FAMILY, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_REASONING_BUDGET, MIDSCENE_PLANNING_MODEL_REASONING_EFFORT, MIDSCENE_PLANNING_MODEL_REASONING_ENABLED, MIDSCENE_PLANNING_MODEL_RETRY_COUNT, MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_TEMPERATURE, MIDSCENE_PLANNING_MODEL_TIMEOUT, MIDSCENE_PREFERRED_LANGUAGE, MIDSCENE_REPLANNING_CYCLE_LIMIT, MIDSCENE_REPORT_QUIET, MIDSCENE_REPORT_TAG_NAME, MIDSCENE_RUN_DIR, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MIDSCENE_USE_VL_MODEL, MODEL_ENV_KEYS, MODEL_FAMILY_VALUES, NUMBER_ENV_KEYS, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS, STRING_ENV_KEYS, types_UITarsModelVersion as UITarsModelVersion, UNUSED_ENV_KEYS, VL_MODE_RAW_VALID_VALUES };
208
+ export { ALL_ENV_KEYS, BASIC_ENV_KEYS, BOOLEAN_ENV_KEYS, DOCKER_CONTAINER, GLOBAL_ENV_KEYS, MATCH_BY_POSITION, MIDSCENE_ADB_PATH, MIDSCENE_ADB_REMOTE_HOST, MIDSCENE_ADB_REMOTE_PORT, MIDSCENE_ANDROID_IME_STRATEGY, MIDSCENE_CACHE, MIDSCENE_CACHE_MAX_FILENAME_LENGTH, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_MODE, MIDSCENE_DEBUG_MODEL_PROFILE, MIDSCENE_DEBUG_MODEL_RESPONSE, MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_FAMILY, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET, MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT, MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED, MIDSCENE_INSIGHT_MODEL_RETRY_COUNT, MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_INSIGHT_MODEL_TEMPERATURE, MIDSCENE_INSIGHT_MODEL_TIMEOUT, MIDSCENE_IOS_DEVICE_UDID, MIDSCENE_IOS_SIMULATOR_UDID, MIDSCENE_LANGFUSE_DEBUG, MIDSCENE_LANGSMITH_DEBUG, MIDSCENE_MCP_ANDROID_MODE, MIDSCENE_MCP_CHROME_PATH, MIDSCENE_MCP_USE_PUPPETEER_MODE, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_FAMILY, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_MAX_TOKENS, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_REASONING_BUDGET, MIDSCENE_MODEL_REASONING_EFFORT, MIDSCENE_MODEL_REASONING_ENABLED, MIDSCENE_MODEL_RETRY_COUNT, MIDSCENE_MODEL_RETRY_INTERVAL, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_MODEL_TEMPERATURE, MIDSCENE_MODEL_TIMEOUT, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_FAMILY, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_REASONING_BUDGET, MIDSCENE_PLANNING_MODEL_REASONING_EFFORT, MIDSCENE_PLANNING_MODEL_REASONING_ENABLED, MIDSCENE_PLANNING_MODEL_RETRY_COUNT, MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_TEMPERATURE, MIDSCENE_PLANNING_MODEL_TIMEOUT, MIDSCENE_PREFERRED_LANGUAGE, MIDSCENE_REPLANNING_CYCLE_LIMIT, MIDSCENE_REPORT_QUIET, MIDSCENE_REPORT_TAG_NAME, MIDSCENE_RUN_DIR, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MIDSCENE_USE_VL_MODEL, MODEL_ENV_KEYS, MODEL_FAMILY_VALUES, NUMBER_ENV_KEYS, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS, STRING_ENV_KEYS, types_UITarsModelVersion as UITarsModelVersion, UNUSED_ENV_KEYS, VL_MODE_RAW_VALID_VALUES };
@@ -35,16 +35,16 @@ IMPORTANT: Follow these exact type signatures for AI functions:
35
35
 
36
36
  // Type signatures for AI functions:
37
37
  aiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<void>
38
- aiInput(text: string, locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
39
- aiTap(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
40
- aiHover(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
41
- aiDoubleClick(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
42
- aiKeyboardPress(key: string, locate?: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
38
+ aiInput(text: string, locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
39
+ aiTap(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
40
+ aiHover(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
41
+ aiDoubleClick(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
42
+ aiKeyboardPress(key: string, locate?: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
43
43
  aiScroll(locate: string | undefined, options: {
44
44
  direction?: 'up' | 'down' | 'left' | 'right',
45
45
  scrollType?: 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft',
46
46
  distance?: number | null,
47
- deepThink?: boolean,
47
+ deepLocate?: boolean,
48
48
  xpath?: string,
49
49
  cacheable?: boolean
50
50
  }): Promise<void>
@@ -138,7 +138,7 @@ tasks:
138
138
  3. Best Practices:
139
139
  - Group related actions into logical tasks
140
140
  - Use natural language descriptions
141
- - Add deepThink: true for complex interactions
141
+ - Add deepLocate: true for complex interactions
142
142
  - Keep task names concise but descriptive
143
143
 
144
144
  4. CRITICAL - YAML Indentation Rules:
@@ -176,33 +176,33 @@ tasks:
176
176
 
177
177
  # Tap an element described by a prompt.
178
178
  - aiTap: <prompt>
179
- deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
179
+ deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
180
180
  xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
181
181
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
182
182
 
183
183
  # Double click an element described by a prompt.
184
184
  - aiDoubleClick: <prompt>
185
- deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
185
+ deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
186
186
  xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
187
187
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
188
188
 
189
189
  # Hover over an element described by a prompt.
190
190
  - aiHover: <prompt>
191
- deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
191
+ deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
192
192
  xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
193
193
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
194
194
 
195
195
  # Input text into an element described by a prompt.
196
196
  - aiInput: <final text content of the input>
197
197
  locate: <prompt>
198
- deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
198
+ deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
199
199
  xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
200
200
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
201
201
 
202
202
  # Press a key (e.g., Enter, Tab, Escape) on an element described by a prompt.
203
203
  - aiKeyboardPress: <key>
204
204
  locate: <prompt>
205
- deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
205
+ deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
206
206
  xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
207
207
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
208
208
 
@@ -212,7 +212,7 @@ tasks:
212
212
  scrollType: 'singleAction' # or 'scrollToTop' | 'scrollToBottom' | 'scrollToLeft' | 'scrollToRight'
213
213
  distance: <number> # Optional, the scroll distance in pixels.
214
214
  locate: <prompt> # Optional, the element to scroll on.
215
- deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
215
+ deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
216
216
  xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
217
217
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
218
218
 
@@ -40,7 +40,6 @@ __webpack_require__.d(__webpack_exports__, {
40
40
  MIDSCENE_DEBUG_MODE: ()=>MIDSCENE_DEBUG_MODE,
41
41
  MIDSCENE_DEBUG_MODEL_PROFILE: ()=>MIDSCENE_DEBUG_MODEL_PROFILE,
42
42
  MIDSCENE_DEBUG_MODEL_RESPONSE: ()=>MIDSCENE_DEBUG_MODEL_RESPONSE,
43
- MIDSCENE_FORCE_DEEP_THINK: ()=>MIDSCENE_FORCE_DEEP_THINK,
44
43
  MIDSCENE_INSIGHT_MODEL_API_KEY: ()=>MIDSCENE_INSIGHT_MODEL_API_KEY,
45
44
  MIDSCENE_INSIGHT_MODEL_BASE_URL: ()=>MIDSCENE_INSIGHT_MODEL_BASE_URL,
46
45
  MIDSCENE_INSIGHT_MODEL_FAMILY: ()=>MIDSCENE_INSIGHT_MODEL_FAMILY,
@@ -126,7 +125,6 @@ const MIDSCENE_MCP_USE_PUPPETEER_MODE = 'MIDSCENE_MCP_USE_PUPPETEER_MODE';
126
125
  const MIDSCENE_MCP_CHROME_PATH = 'MIDSCENE_MCP_CHROME_PATH';
127
126
  const MIDSCENE_MCP_ANDROID_MODE = 'MIDSCENE_MCP_ANDROID_MODE';
128
127
  const DOCKER_CONTAINER = 'DOCKER_CONTAINER';
129
- const MIDSCENE_FORCE_DEEP_THINK = 'MIDSCENE_FORCE_DEEP_THINK';
130
128
  const MIDSCENE_LANGSMITH_DEBUG = 'MIDSCENE_LANGSMITH_DEBUG';
131
129
  const MIDSCENE_LANGFUSE_DEBUG = 'MIDSCENE_LANGFUSE_DEBUG';
132
130
  const MIDSCENE_MODEL_SOCKS_PROXY = 'MIDSCENE_MODEL_SOCKS_PROXY';
@@ -207,7 +205,6 @@ const BASIC_ENV_KEYS = [
207
205
  ];
208
206
  const BOOLEAN_ENV_KEYS = [
209
207
  MIDSCENE_CACHE,
210
- MIDSCENE_FORCE_DEEP_THINK,
211
208
  MIDSCENE_MCP_USE_PUPPETEER_MODE,
212
209
  MIDSCENE_MCP_ANDROID_MODE,
213
210
  MIDSCENE_LANGSMITH_DEBUG,
@@ -341,7 +338,6 @@ exports.MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG = __webpack_exports__.MIDSCENE_DAN
341
338
  exports.MIDSCENE_DEBUG_MODE = __webpack_exports__.MIDSCENE_DEBUG_MODE;
342
339
  exports.MIDSCENE_DEBUG_MODEL_PROFILE = __webpack_exports__.MIDSCENE_DEBUG_MODEL_PROFILE;
343
340
  exports.MIDSCENE_DEBUG_MODEL_RESPONSE = __webpack_exports__.MIDSCENE_DEBUG_MODEL_RESPONSE;
344
- exports.MIDSCENE_FORCE_DEEP_THINK = __webpack_exports__.MIDSCENE_FORCE_DEEP_THINK;
345
341
  exports.MIDSCENE_INSIGHT_MODEL_API_KEY = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_API_KEY;
346
342
  exports.MIDSCENE_INSIGHT_MODEL_BASE_URL = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_BASE_URL;
347
343
  exports.MIDSCENE_INSIGHT_MODEL_FAMILY = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_FAMILY;
@@ -433,7 +429,6 @@ for(var __rspack_i in __webpack_exports__)if (-1 === [
433
429
  "MIDSCENE_DEBUG_MODE",
434
430
  "MIDSCENE_DEBUG_MODEL_PROFILE",
435
431
  "MIDSCENE_DEBUG_MODEL_RESPONSE",
436
- "MIDSCENE_FORCE_DEEP_THINK",
437
432
  "MIDSCENE_INSIGHT_MODEL_API_KEY",
438
433
  "MIDSCENE_INSIGHT_MODEL_BASE_URL",
439
434
  "MIDSCENE_INSIGHT_MODEL_FAMILY",
@@ -1,2 +1,2 @@
1
- export declare const PLAYWRIGHT_EXAMPLE_CODE = "\n// Reference the following code to generate Midscene test cases\n// The following is test code for Midscene AI, for reference\n// The following is Playwright syntax, you can use Playwright to assist in test generation\nIMPORTANT: Follow these exact type signatures for AI functions:\n\n// Type signatures for AI functions:\naiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<void>\naiInput(text: string, locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiTap(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiHover(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiDoubleClick(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiKeyboardPress(key: string, locate?: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiScroll(locate: string | undefined, options: {\n direction?: 'up' | 'down' | 'left' | 'right',\n scrollType?: 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft',\n distance?: number | null,\n deepThink?: boolean,\n xpath?: string,\n cacheable?: boolean\n}): Promise<void>\naiAssert(assertion: string, options?: { errorMessage?: string }): Promise<void>\naiWaitFor(prompt: string, options?: { timeout?: number }): Promise<void>\naiQuery<T>(queryObject: Record<string, string>): Promise<T> // Extracts data from page based on descriptions\n\n// examples:\n// Reference the following code to generate Midscene test cases\n// The following is test code for Midscene AI, for reference\n// The following is Playwright syntax, you can use Playwright to assist in test generation\nimport { test as base } from '@playwright/test';\nimport type { PlayWrightAiFixtureType } from '@midscene/web/playwright';\nimport { PlaywrightAiFixture } from '@midscene/web/playwright';\n\nconst test = base.extend<PlayWrightAiFixtureType>(PlaywrightAiFixture({\n waitForNetworkIdleTimeout: 2000, // optional, the timeout for waiting for network idle between each action, default is 2000ms\n}));\n\n\ntest.beforeEach(async ({ page }) => {\n await page.goto('https://www.xxx.com/');\n await page.setViewportSize({ width: 1920, height: 1080 });\n});\n\ntest('ai shop', async ({\n aiAct,\n aiInput,\n aiAssert,\n aiQuery,\n aiKeyboardPress,\n aiHover,\n aiTap,\n aiWaitFor,\n agentForPage,\n page,\n}) => {\n // login\n await aiAssert('The page shows the login interface');\n await aiInput('user_name', 'in user name input');\n await aiInput('password', 'in password input');\n await aiKeyboardPress('Enter', 'Login Button');\n\n // check the login success\n await aiWaitFor('The page shows that the loading is complete');\n await aiAssert('The current page shows the product detail page');\n\n // check the product info\n const dataA = await aiQuery({\n userInfo: 'User information in the format {name: string}',\n theFirstProductInfo: 'The first product info in the format {name: string, price: number}',\n });\n expect(dataA.theFirstProductInfo.name).toBe('xxx');\n expect(dataA.theFirstProductInfo.price).toBe(100);\n\n\n // add to cart\n await aiTap('click add to cart button');\n \n await aiTap('click right top cart icon');\n await aiAssert('The cart icon shows the number 1');\n});\n";
2
- export declare const YAML_EXAMPLE_CODE = "\n1. Format:\n\nweb:\n url: \"starting_url\"\n viewportWidth: 1280\n viewportHeight: 960\n\ntasks:\n - name: \"descriptive task name\"\n flow:\n - aiTap: \"element description\"\n - aiInput: 'text value'\n locate: 'input field description'\n - aiScroll:\n direction: down/up\n scrollType: scrollToBottom/scrollToTop/singleAction\n - aiAssert: \"expected state\"\n - sleep: milliseconds\n\n2. Action Types:\n- aiTap: for clicks (natural language targeting)\n- aiInput: for text input with 'locate' field\n- aiScroll: with direction and scrollType\n- aiAssert: for validations\n- sleep: for delays (milliseconds)\n\n3. Best Practices:\n- Group related actions into logical tasks\n- Use natural language descriptions\n- Add deepThink: true for complex interactions\n- Keep task names concise but descriptive\n\n4. CRITICAL - YAML Indentation Rules:\n- For actions with additional parameters (aiScroll, aiInput, aiKeyboardPress), the parameters must be SIBLING keys, NOT nested children\n- Parameters like direction, scrollType, locate must align with the action key, not indented further\n- CORRECT indentation example:\n - aiScroll:\n direction: down\n scrollType: singleAction\n- WRONG indentation (DO NOT do this):\n - aiScroll:\n direction: down\n scrollType: singleAction\n\n\n\nYAML type\ntasks:\n - name: <name>\n continueOnError: <boolean> # Optional, whether to continue to the next task on error, defaults to false.\n flow:\n # Auto Planning (.ai)\n # ----------------\n\n # Perform an interaction. `ai` is a shorthand for `aiAct`.\n - ai: <prompt>\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # This usage is the same as `ai`.\n - aiAct: <prompt>\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Instant Action (.aiTap, .aiDoubleClick, .aiHover, .aiInput, .aiKeyboardPress, .aiScroll)\n # ----------------\n\n # Tap an element described by a prompt.\n - aiTap: <prompt>\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Double click an element described by a prompt.\n - aiDoubleClick: <prompt>\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Hover over an element described by a prompt.\n - aiHover: <prompt>\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Input text into an element described by a prompt.\n - aiInput: <final text content of the input>\n locate: <prompt>\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Press a key (e.g., Enter, Tab, Escape) on an element described by a prompt.\n - aiKeyboardPress: <key>\n locate: <prompt>\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Scroll globally or on an element described by a prompt.\n - aiScroll:\n direction: 'up' # or 'down' | 'left' | 'right'\n scrollType: 'singleAction' # or 'scrollToTop' | 'scrollToBottom' | 'scrollToLeft' | 'scrollToRight'\n distance: <number> # Optional, the scroll distance in pixels.\n locate: <prompt> # Optional, the element to scroll on.\n deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Record the current screenshot with a description in the report file.\n - recordToReport: <title> # Optional, the title of the screenshot. If not provided, the title will be 'untitled'.\n content: <content> # Optional, the description of the screenshot.\n\n # Data Extraction\n # ----------------\n\n # Perform a query that returns a JSON object.\n - aiQuery: <prompt> # Remember to describe the format of the result in the prompt.\n name: <name> # The key for the query result in the JSON output.\n\n # More APIs\n # ----------------\n\n # Wait for a condition to be met, with a timeout (in ms, optional, defaults to 30000).\n - aiWaitFor: <prompt>\n timeout: <ms>\n\n # Perform an assertion.\n - aiAssert: <prompt>\n errorMessage: <error-message> # Optional, the error message to print if the assertion fails.\n\n # Wait for a specified amount of time.\n - sleep: <ms>\n\n # Execute a piece of JavaScript code in the web page context.\n - javascript: <javascript>\n name: <name> # Optional, assign a name to the return value, which will be used as a key in the JSON output.\n\n - name: <name>\n flow:\n # ...\n";
1
+ export declare const PLAYWRIGHT_EXAMPLE_CODE = "\n// Reference the following code to generate Midscene test cases\n// The following is test code for Midscene AI, for reference\n// The following is Playwright syntax, you can use Playwright to assist in test generation\nIMPORTANT: Follow these exact type signatures for AI functions:\n\n// Type signatures for AI functions:\naiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<void>\naiInput(text: string, locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiTap(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiHover(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiDoubleClick(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiKeyboardPress(key: string, locate?: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiScroll(locate: string | undefined, options: {\n direction?: 'up' | 'down' | 'left' | 'right',\n scrollType?: 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft',\n distance?: number | null,\n deepLocate?: boolean,\n xpath?: string,\n cacheable?: boolean\n}): Promise<void>\naiAssert(assertion: string, options?: { errorMessage?: string }): Promise<void>\naiWaitFor(prompt: string, options?: { timeout?: number }): Promise<void>\naiQuery<T>(queryObject: Record<string, string>): Promise<T> // Extracts data from page based on descriptions\n\n// examples:\n// Reference the following code to generate Midscene test cases\n// The following is test code for Midscene AI, for reference\n// The following is Playwright syntax, you can use Playwright to assist in test generation\nimport { test as base } from '@playwright/test';\nimport type { PlayWrightAiFixtureType } from '@midscene/web/playwright';\nimport { PlaywrightAiFixture } from '@midscene/web/playwright';\n\nconst test = base.extend<PlayWrightAiFixtureType>(PlaywrightAiFixture({\n waitForNetworkIdleTimeout: 2000, // optional, the timeout for waiting for network idle between each action, default is 2000ms\n}));\n\n\ntest.beforeEach(async ({ page }) => {\n await page.goto('https://www.xxx.com/');\n await page.setViewportSize({ width: 1920, height: 1080 });\n});\n\ntest('ai shop', async ({\n aiAct,\n aiInput,\n aiAssert,\n aiQuery,\n aiKeyboardPress,\n aiHover,\n aiTap,\n aiWaitFor,\n agentForPage,\n page,\n}) => {\n // login\n await aiAssert('The page shows the login interface');\n await aiInput('user_name', 'in user name input');\n await aiInput('password', 'in password input');\n await aiKeyboardPress('Enter', 'Login Button');\n\n // check the login success\n await aiWaitFor('The page shows that the loading is complete');\n await aiAssert('The current page shows the product detail page');\n\n // check the product info\n const dataA = await aiQuery({\n userInfo: 'User information in the format {name: string}',\n theFirstProductInfo: 'The first product info in the format {name: string, price: number}',\n });\n expect(dataA.theFirstProductInfo.name).toBe('xxx');\n expect(dataA.theFirstProductInfo.price).toBe(100);\n\n\n // add to cart\n await aiTap('click add to cart button');\n \n await aiTap('click right top cart icon');\n await aiAssert('The cart icon shows the number 1');\n});\n";
2
+ export declare const YAML_EXAMPLE_CODE = "\n1. Format:\n\nweb:\n url: \"starting_url\"\n viewportWidth: 1280\n viewportHeight: 960\n\ntasks:\n - name: \"descriptive task name\"\n flow:\n - aiTap: \"element description\"\n - aiInput: 'text value'\n locate: 'input field description'\n - aiScroll:\n direction: down/up\n scrollType: scrollToBottom/scrollToTop/singleAction\n - aiAssert: \"expected state\"\n - sleep: milliseconds\n\n2. Action Types:\n- aiTap: for clicks (natural language targeting)\n- aiInput: for text input with 'locate' field\n- aiScroll: with direction and scrollType\n- aiAssert: for validations\n- sleep: for delays (milliseconds)\n\n3. Best Practices:\n- Group related actions into logical tasks\n- Use natural language descriptions\n- Add deepLocate: true for complex interactions\n- Keep task names concise but descriptive\n\n4. CRITICAL - YAML Indentation Rules:\n- For actions with additional parameters (aiScroll, aiInput, aiKeyboardPress), the parameters must be SIBLING keys, NOT nested children\n- Parameters like direction, scrollType, locate must align with the action key, not indented further\n- CORRECT indentation example:\n - aiScroll:\n direction: down\n scrollType: singleAction\n- WRONG indentation (DO NOT do this):\n - aiScroll:\n direction: down\n scrollType: singleAction\n\n\n\nYAML type\ntasks:\n - name: <name>\n continueOnError: <boolean> # Optional, whether to continue to the next task on error, defaults to false.\n flow:\n # Auto Planning (.ai)\n # ----------------\n\n # Perform an interaction. `ai` is a shorthand for `aiAct`.\n - ai: <prompt>\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # This usage is the same as `ai`.\n - aiAct: <prompt>\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Instant Action (.aiTap, .aiDoubleClick, .aiHover, .aiInput, .aiKeyboardPress, .aiScroll)\n # ----------------\n\n # Tap an element described by a prompt.\n - aiTap: <prompt>\n deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Double click an element described by a prompt.\n - aiDoubleClick: <prompt>\n deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Hover over an element described by a prompt.\n - aiHover: <prompt>\n deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Input text into an element described by a prompt.\n - aiInput: <final text content of the input>\n locate: <prompt>\n deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Press a key (e.g., Enter, Tab, Escape) on an element described by a prompt.\n - aiKeyboardPress: <key>\n locate: <prompt>\n deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Scroll globally or on an element described by a prompt.\n - aiScroll:\n direction: 'up' # or 'down' | 'left' | 'right'\n scrollType: 'singleAction' # or 'scrollToTop' | 'scrollToBottom' | 'scrollToLeft' | 'scrollToRight'\n distance: <number> # Optional, the scroll distance in pixels.\n locate: <prompt> # Optional, the element to scroll on.\n deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Record the current screenshot with a description in the report file.\n - recordToReport: <title> # Optional, the title of the screenshot. If not provided, the title will be 'untitled'.\n content: <content> # Optional, the description of the screenshot.\n\n # Data Extraction\n # ----------------\n\n # Perform a query that returns a JSON object.\n - aiQuery: <prompt> # Remember to describe the format of the result in the prompt.\n name: <name> # The key for the query result in the JSON output.\n\n # More APIs\n # ----------------\n\n # Wait for a condition to be met, with a timeout (in ms, optional, defaults to 30000).\n - aiWaitFor: <prompt>\n timeout: <ms>\n\n # Perform an assertion.\n - aiAssert: <prompt>\n errorMessage: <error-message> # Optional, the error message to print if the assertion fails.\n\n # Wait for a specified amount of time.\n - sleep: <ms>\n\n # Execute a piece of JavaScript code in the web page context.\n - javascript: <javascript>\n name: <name> # Optional, assign a name to the return value, which will be used as a key in the JSON output.\n\n - name: <name>\n flow:\n # ...\n";
@@ -8,7 +8,6 @@ export declare const MIDSCENE_MCP_USE_PUPPETEER_MODE = "MIDSCENE_MCP_USE_PUPPETE
8
8
  export declare const MIDSCENE_MCP_CHROME_PATH = "MIDSCENE_MCP_CHROME_PATH";
9
9
  export declare const MIDSCENE_MCP_ANDROID_MODE = "MIDSCENE_MCP_ANDROID_MODE";
10
10
  export declare const DOCKER_CONTAINER = "DOCKER_CONTAINER";
11
- export declare const MIDSCENE_FORCE_DEEP_THINK = "MIDSCENE_FORCE_DEEP_THINK";
12
11
  export declare const MIDSCENE_LANGSMITH_DEBUG = "MIDSCENE_LANGSMITH_DEBUG";
13
12
  export declare const MIDSCENE_LANGFUSE_DEBUG = "MIDSCENE_LANGFUSE_DEBUG";
14
13
  export declare const MIDSCENE_MODEL_SOCKS_PROXY = "MIDSCENE_MODEL_SOCKS_PROXY";
@@ -105,7 +104,7 @@ export declare const UNUSED_ENV_KEYS: string[];
105
104
  * can not be override by overrideAIConfig
106
105
  */
107
106
  export declare const BASIC_ENV_KEYS: readonly ["MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR"];
108
- export declare const BOOLEAN_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_FORCE_DEEP_THINK", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET"];
107
+ export declare const BOOLEAN_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET"];
109
108
  export declare const NUMBER_ENV_KEYS: readonly ["MIDSCENE_MODEL_MAX_TOKENS", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT"];
110
109
  export declare const STRING_ENV_KEYS: readonly ["MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER"];
111
110
  /**
@@ -113,14 +112,14 @@ export declare const STRING_ENV_KEYS: readonly ["MIDSCENE_MODEL_MAX_TOKENS", "OP
113
112
  * Can not be override by agent.modelConfig but can be override by overrideAIConfig
114
113
  * Can be access at any time
115
114
  */
116
- export declare const GLOBAL_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_FORCE_DEEP_THINK", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET", "MIDSCENE_MODEL_MAX_TOKENS", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER"];
115
+ export declare const GLOBAL_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET", "MIDSCENE_MODEL_MAX_TOKENS", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER"];
117
116
  /**
118
117
  * Model related eve keys, used for declare which model to use.
119
118
  * Can be override by both agent.modelConfig and overrideAIConfig
120
119
  * Can only be access after agent.constructor
121
120
  */
122
121
  export declare const MODEL_ENV_KEYS: readonly ["MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_MODEL_TIMEOUT", "MIDSCENE_MODEL_TEMPERATURE", "MIDSCENE_MODEL_RETRY_COUNT", "MIDSCENE_MODEL_RETRY_INTERVAL", "MIDSCENE_MODEL_REASONING_EFFORT", "MIDSCENE_MODEL_REASONING_ENABLED", "MIDSCENE_MODEL_REASONING_BUDGET", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_MODEL_TIMEOUT", "MIDSCENE_INSIGHT_MODEL_TEMPERATURE", "MIDSCENE_INSIGHT_MODEL_RETRY_COUNT", "MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL", "MIDSCENE_INSIGHT_MODEL_FAMILY", "MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT", "MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED", "MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_MODEL_TIMEOUT", "MIDSCENE_PLANNING_MODEL_TEMPERATURE", "MIDSCENE_PLANNING_MODEL_RETRY_COUNT", "MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL", "MIDSCENE_PLANNING_MODEL_FAMILY", "MIDSCENE_PLANNING_MODEL_REASONING_EFFORT", "MIDSCENE_PLANNING_MODEL_REASONING_ENABLED", "MIDSCENE_PLANNING_MODEL_REASONING_BUDGET", "MIDSCENE_MODEL_FAMILY"];
123
- export declare const ALL_ENV_KEYS: readonly [...string[], "MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR", "MIDSCENE_CACHE", "MIDSCENE_FORCE_DEEP_THINK", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET", "MIDSCENE_MODEL_MAX_TOKENS", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER", "MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_MODEL_TIMEOUT", "MIDSCENE_MODEL_TEMPERATURE", "MIDSCENE_MODEL_RETRY_COUNT", "MIDSCENE_MODEL_RETRY_INTERVAL", "MIDSCENE_MODEL_REASONING_EFFORT", "MIDSCENE_MODEL_REASONING_ENABLED", "MIDSCENE_MODEL_REASONING_BUDGET", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_MODEL_TIMEOUT", "MIDSCENE_INSIGHT_MODEL_TEMPERATURE", "MIDSCENE_INSIGHT_MODEL_RETRY_COUNT", "MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL", "MIDSCENE_INSIGHT_MODEL_FAMILY", "MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT", "MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED", "MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_MODEL_TIMEOUT", "MIDSCENE_PLANNING_MODEL_TEMPERATURE", "MIDSCENE_PLANNING_MODEL_RETRY_COUNT", "MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL", "MIDSCENE_PLANNING_MODEL_FAMILY", "MIDSCENE_PLANNING_MODEL_REASONING_EFFORT", "MIDSCENE_PLANNING_MODEL_REASONING_ENABLED", "MIDSCENE_PLANNING_MODEL_REASONING_BUDGET", "MIDSCENE_MODEL_FAMILY"];
122
+ export declare const ALL_ENV_KEYS: readonly [...string[], "MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR", "MIDSCENE_CACHE", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET", "MIDSCENE_MODEL_MAX_TOKENS", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER", "MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_MODEL_TIMEOUT", "MIDSCENE_MODEL_TEMPERATURE", "MIDSCENE_MODEL_RETRY_COUNT", "MIDSCENE_MODEL_RETRY_INTERVAL", "MIDSCENE_MODEL_REASONING_EFFORT", "MIDSCENE_MODEL_REASONING_ENABLED", "MIDSCENE_MODEL_REASONING_BUDGET", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_MODEL_TIMEOUT", "MIDSCENE_INSIGHT_MODEL_TEMPERATURE", "MIDSCENE_INSIGHT_MODEL_RETRY_COUNT", "MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL", "MIDSCENE_INSIGHT_MODEL_FAMILY", "MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT", "MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED", "MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_MODEL_TIMEOUT", "MIDSCENE_PLANNING_MODEL_TEMPERATURE", "MIDSCENE_PLANNING_MODEL_RETRY_COUNT", "MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL", "MIDSCENE_PLANNING_MODEL_FAMILY", "MIDSCENE_PLANNING_MODEL_REASONING_EFFORT", "MIDSCENE_PLANNING_MODEL_REASONING_ENABLED", "MIDSCENE_PLANNING_MODEL_REASONING_BUDGET", "MIDSCENE_MODEL_FAMILY"];
124
123
  export type TEnvKeys = (typeof ALL_ENV_KEYS)[number];
125
124
  export type TGlobalConfig = Record<TEnvKeys, string | undefined>;
126
125
  export type TVlModeValues = 'qwen2.5-vl' | 'qwen3-vl' | 'qwen3.5' | 'doubao-vision' | 'doubao-seed' | 'gemini' | 'vlm-ui-tars' | 'vlm-ui-tars-doubao' | 'vlm-ui-tars-doubao-1.5' | 'glm-v' | 'auto-glm' | 'auto-glm-multilingual';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@midscene/shared",
3
- "version": "1.5.1-beta-20260302102736.0",
3
+ "version": "1.5.1",
4
4
  "repository": "https://github.com/web-infra-dev/midscene",
5
5
  "homepage": "https://midscenejs.com/",
6
6
  "types": "./dist/types/index.d.ts",
@@ -6,16 +6,16 @@ IMPORTANT: Follow these exact type signatures for AI functions:
6
6
 
7
7
  // Type signatures for AI functions:
8
8
  aiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<void>
9
- aiInput(text: string, locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
10
- aiTap(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
11
- aiHover(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
12
- aiDoubleClick(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
13
- aiKeyboardPress(key: string, locate?: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
9
+ aiInput(text: string, locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
10
+ aiTap(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
11
+ aiHover(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
12
+ aiDoubleClick(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
13
+ aiKeyboardPress(key: string, locate?: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
14
14
  aiScroll(locate: string | undefined, options: {
15
15
  direction?: 'up' | 'down' | 'left' | 'right',
16
16
  scrollType?: 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft',
17
17
  distance?: number | null,
18
- deepThink?: boolean,
18
+ deepLocate?: boolean,
19
19
  xpath?: string,
20
20
  cacheable?: boolean
21
21
  }): Promise<void>
@@ -110,7 +110,7 @@ tasks:
110
110
  3. Best Practices:
111
111
  - Group related actions into logical tasks
112
112
  - Use natural language descriptions
113
- - Add deepThink: true for complex interactions
113
+ - Add deepLocate: true for complex interactions
114
114
  - Keep task names concise but descriptive
115
115
 
116
116
  4. CRITICAL - YAML Indentation Rules:
@@ -148,33 +148,33 @@ tasks:
148
148
 
149
149
  # Tap an element described by a prompt.
150
150
  - aiTap: <prompt>
151
- deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
151
+ deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
152
152
  xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
153
153
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
154
154
 
155
155
  # Double click an element described by a prompt.
156
156
  - aiDoubleClick: <prompt>
157
- deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
157
+ deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
158
158
  xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
159
159
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
160
160
 
161
161
  # Hover over an element described by a prompt.
162
162
  - aiHover: <prompt>
163
- deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
163
+ deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
164
164
  xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
165
165
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
166
166
 
167
167
  # Input text into an element described by a prompt.
168
168
  - aiInput: <final text content of the input>
169
169
  locate: <prompt>
170
- deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
170
+ deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
171
171
  xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
172
172
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
173
173
 
174
174
  # Press a key (e.g., Enter, Tab, Escape) on an element described by a prompt.
175
175
  - aiKeyboardPress: <key>
176
176
  locate: <prompt>
177
- deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
177
+ deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
178
178
  xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
179
179
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
180
180
 
@@ -184,7 +184,7 @@ tasks:
184
184
  scrollType: 'singleAction' # or 'scrollToTop' | 'scrollToBottom' | 'scrollToLeft' | 'scrollToRight'
185
185
  distance: <number> # Optional, the scroll distance in pixels.
186
186
  locate: <prompt> # Optional, the element to scroll on.
187
- deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
187
+ deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
188
188
  xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
189
189
  cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
190
190
 
package/src/env/types.ts CHANGED
@@ -12,7 +12,6 @@ export const MIDSCENE_MCP_USE_PUPPETEER_MODE =
12
12
  export const MIDSCENE_MCP_CHROME_PATH = 'MIDSCENE_MCP_CHROME_PATH';
13
13
  export const MIDSCENE_MCP_ANDROID_MODE = 'MIDSCENE_MCP_ANDROID_MODE';
14
14
  export const DOCKER_CONTAINER = 'DOCKER_CONTAINER';
15
- export const MIDSCENE_FORCE_DEEP_THINK = 'MIDSCENE_FORCE_DEEP_THINK';
16
15
 
17
16
  // Observability
18
17
  export const MIDSCENE_LANGSMITH_DEBUG = 'MIDSCENE_LANGSMITH_DEBUG';
@@ -164,7 +163,6 @@ export const BASIC_ENV_KEYS = [
164
163
 
165
164
  export const BOOLEAN_ENV_KEYS = [
166
165
  MIDSCENE_CACHE,
167
- MIDSCENE_FORCE_DEEP_THINK,
168
166
  MIDSCENE_MCP_USE_PUPPETEER_MODE,
169
167
  MIDSCENE_MCP_ANDROID_MODE,
170
168
  MIDSCENE_LANGSMITH_DEBUG,