npm - @midscene/shared - Versions diffs - 1.5.1-beta-20260302102736.0 → 1.5.1 - Mend

@midscene/shared 1.5.1-beta-20260302102736.0 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/es/constants/example-code.mjs +13 -13
package/dist/es/env/types.mjs +1 -3
package/dist/lib/constants/example-code.js +13 -13
package/dist/lib/env/types.js +0 -5
package/dist/types/constants/example-code.d.ts +2 -2
package/dist/types/env/types.d.ts +3 -4
package/package.json +1 -1
package/src/constants/example-code.ts +13 -13
package/src/env/types.ts +0 -2

package/dist/es/constants/example-code.mjs CHANGED Viewed

@@ -6,16 +6,16 @@ IMPORTANT: Follow these exact type signatures for AI functions:
 // Type signatures for AI functions:
 aiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<void>
-aiInput(text: string, locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
-aiTap(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
-aiHover(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
-aiDoubleClick(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
-aiKeyboardPress(key: string, locate?: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
+aiInput(text: string, locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
+aiTap(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
+aiHover(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
+aiDoubleClick(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
+aiKeyboardPress(key: string, locate?: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
 aiScroll(locate: string | undefined, options: {
   direction?: 'up' | 'down' | 'left' | 'right',
   scrollType?: 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft',
   distance?: number | null,
-  deepThink?: boolean,
+  deepLocate?: boolean,
   xpath?: string,
   cacheable?: boolean
 }): Promise<void>
@@ -109,7 +109,7 @@ tasks:
 3. Best Practices:
 - Group related actions into logical tasks
 - Use natural language descriptions
-- Add deepThink: true for complex interactions
+- Add deepLocate: true for complex interactions
 - Keep task names concise but descriptive
 4. CRITICAL - YAML Indentation Rules:
@@ -147,33 +147,33 @@ tasks:
       # Tap an element described by a prompt.
       - aiTap: <prompt>
-        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
+        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
         xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
         cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
       # Double click an element described by a prompt.
       - aiDoubleClick: <prompt>
-        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
+        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
         xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
         cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
       # Hover over an element described by a prompt.
       - aiHover: <prompt>
-        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
+        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
         xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
         cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
       # Input text into an element described by a prompt.
       - aiInput: <final text content of the input>
         locate: <prompt>
-        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
+        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
         xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
         cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
       # Press a key (e.g., Enter, Tab, Escape) on an element described by a prompt.
       - aiKeyboardPress: <key>
         locate: <prompt>
-        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
+        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
         xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
         cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
@@ -183,7 +183,7 @@ tasks:
         scrollType: 'singleAction' # or 'scrollToTop' | 'scrollToBottom' | 'scrollToLeft' | 'scrollToRight'
         distance: <number> # Optional, the scroll distance in pixels.
         locate: <prompt> # Optional, the element to scroll on.
-        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
+        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
         xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
         cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.

package/dist/es/env/types.mjs CHANGED Viewed

@@ -8,7 +8,6 @@ const MIDSCENE_MCP_USE_PUPPETEER_MODE = 'MIDSCENE_MCP_USE_PUPPETEER_MODE';
 const MIDSCENE_MCP_CHROME_PATH = 'MIDSCENE_MCP_CHROME_PATH';
 const MIDSCENE_MCP_ANDROID_MODE = 'MIDSCENE_MCP_ANDROID_MODE';
 const DOCKER_CONTAINER = 'DOCKER_CONTAINER';
-const MIDSCENE_FORCE_DEEP_THINK = 'MIDSCENE_FORCE_DEEP_THINK';
 const MIDSCENE_LANGSMITH_DEBUG = 'MIDSCENE_LANGSMITH_DEBUG';
 const MIDSCENE_LANGFUSE_DEBUG = 'MIDSCENE_LANGFUSE_DEBUG';
 const MIDSCENE_MODEL_SOCKS_PROXY = 'MIDSCENE_MODEL_SOCKS_PROXY';
@@ -89,7 +88,6 @@ const BASIC_ENV_KEYS = [
 ];
 const BOOLEAN_ENV_KEYS = [
     MIDSCENE_CACHE,
-    MIDSCENE_FORCE_DEEP_THINK,
     MIDSCENE_MCP_USE_PUPPETEER_MODE,
     MIDSCENE_MCP_ANDROID_MODE,
     MIDSCENE_LANGSMITH_DEBUG,
@@ -207,4 +205,4 @@ var types_UITarsModelVersion = /*#__PURE__*/ function(UITarsModelVersion) {
     UITarsModelVersion["DOUBAO_1_5_20B"] = "doubao-1.5-20B";
     return UITarsModelVersion;
 }({});
-export { ALL_ENV_KEYS, BASIC_ENV_KEYS, BOOLEAN_ENV_KEYS, DOCKER_CONTAINER, GLOBAL_ENV_KEYS, MATCH_BY_POSITION, MIDSCENE_ADB_PATH, MIDSCENE_ADB_REMOTE_HOST, MIDSCENE_ADB_REMOTE_PORT, MIDSCENE_ANDROID_IME_STRATEGY, MIDSCENE_CACHE, MIDSCENE_CACHE_MAX_FILENAME_LENGTH, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_MODE, MIDSCENE_DEBUG_MODEL_PROFILE, MIDSCENE_DEBUG_MODEL_RESPONSE, MIDSCENE_FORCE_DEEP_THINK, MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_FAMILY, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET, MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT, MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED, MIDSCENE_INSIGHT_MODEL_RETRY_COUNT, MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_INSIGHT_MODEL_TEMPERATURE, MIDSCENE_INSIGHT_MODEL_TIMEOUT, MIDSCENE_IOS_DEVICE_UDID, MIDSCENE_IOS_SIMULATOR_UDID, MIDSCENE_LANGFUSE_DEBUG, MIDSCENE_LANGSMITH_DEBUG, MIDSCENE_MCP_ANDROID_MODE, MIDSCENE_MCP_CHROME_PATH, MIDSCENE_MCP_USE_PUPPETEER_MODE, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_FAMILY, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_MAX_TOKENS, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_REASONING_BUDGET, MIDSCENE_MODEL_REASONING_EFFORT, MIDSCENE_MODEL_REASONING_ENABLED, MIDSCENE_MODEL_RETRY_COUNT, MIDSCENE_MODEL_RETRY_INTERVAL, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_MODEL_TEMPERATURE, MIDSCENE_MODEL_TIMEOUT, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_FAMILY, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_REASONING_BUDGET, MIDSCENE_PLANNING_MODEL_REASONING_EFFORT, MIDSCENE_PLANNING_MODEL_REASONING_ENABLED, MIDSCENE_PLANNING_MODEL_RETRY_COUNT, MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_TEMPERATURE, MIDSCENE_PLANNING_MODEL_TIMEOUT, MIDSCENE_PREFERRED_LANGUAGE, MIDSCENE_REPLANNING_CYCLE_LIMIT, MIDSCENE_REPORT_QUIET, MIDSCENE_REPORT_TAG_NAME, MIDSCENE_RUN_DIR, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MIDSCENE_USE_VL_MODEL, MODEL_ENV_KEYS, MODEL_FAMILY_VALUES, NUMBER_ENV_KEYS, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS, STRING_ENV_KEYS, types_UITarsModelVersion as UITarsModelVersion, UNUSED_ENV_KEYS, VL_MODE_RAW_VALID_VALUES };
+export { ALL_ENV_KEYS, BASIC_ENV_KEYS, BOOLEAN_ENV_KEYS, DOCKER_CONTAINER, GLOBAL_ENV_KEYS, MATCH_BY_POSITION, MIDSCENE_ADB_PATH, MIDSCENE_ADB_REMOTE_HOST, MIDSCENE_ADB_REMOTE_PORT, MIDSCENE_ANDROID_IME_STRATEGY, MIDSCENE_CACHE, MIDSCENE_CACHE_MAX_FILENAME_LENGTH, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_MODE, MIDSCENE_DEBUG_MODEL_PROFILE, MIDSCENE_DEBUG_MODEL_RESPONSE, MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_FAMILY, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET, MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT, MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED, MIDSCENE_INSIGHT_MODEL_RETRY_COUNT, MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_INSIGHT_MODEL_TEMPERATURE, MIDSCENE_INSIGHT_MODEL_TIMEOUT, MIDSCENE_IOS_DEVICE_UDID, MIDSCENE_IOS_SIMULATOR_UDID, MIDSCENE_LANGFUSE_DEBUG, MIDSCENE_LANGSMITH_DEBUG, MIDSCENE_MCP_ANDROID_MODE, MIDSCENE_MCP_CHROME_PATH, MIDSCENE_MCP_USE_PUPPETEER_MODE, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_FAMILY, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_MAX_TOKENS, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_REASONING_BUDGET, MIDSCENE_MODEL_REASONING_EFFORT, MIDSCENE_MODEL_REASONING_ENABLED, MIDSCENE_MODEL_RETRY_COUNT, MIDSCENE_MODEL_RETRY_INTERVAL, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_MODEL_TEMPERATURE, MIDSCENE_MODEL_TIMEOUT, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_FAMILY, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_REASONING_BUDGET, MIDSCENE_PLANNING_MODEL_REASONING_EFFORT, MIDSCENE_PLANNING_MODEL_REASONING_ENABLED, MIDSCENE_PLANNING_MODEL_RETRY_COUNT, MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_TEMPERATURE, MIDSCENE_PLANNING_MODEL_TIMEOUT, MIDSCENE_PREFERRED_LANGUAGE, MIDSCENE_REPLANNING_CYCLE_LIMIT, MIDSCENE_REPORT_QUIET, MIDSCENE_REPORT_TAG_NAME, MIDSCENE_RUN_DIR, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MIDSCENE_USE_VL_MODEL, MODEL_ENV_KEYS, MODEL_FAMILY_VALUES, NUMBER_ENV_KEYS, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS, STRING_ENV_KEYS, types_UITarsModelVersion as UITarsModelVersion, UNUSED_ENV_KEYS, VL_MODE_RAW_VALID_VALUES };

package/dist/lib/constants/example-code.js CHANGED Viewed

@@ -35,16 +35,16 @@ IMPORTANT: Follow these exact type signatures for AI functions:
 // Type signatures for AI functions:
 aiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<void>
-aiInput(text: string, locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
-aiTap(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
-aiHover(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
-aiDoubleClick(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
-aiKeyboardPress(key: string, locate?: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
+aiInput(text: string, locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
+aiTap(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
+aiHover(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
+aiDoubleClick(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
+aiKeyboardPress(key: string, locate?: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
 aiScroll(locate: string | undefined, options: {
   direction?: 'up' | 'down' | 'left' | 'right',
   scrollType?: 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft',
   distance?: number | null,
-  deepThink?: boolean,
+  deepLocate?: boolean,
   xpath?: string,
   cacheable?: boolean
 }): Promise<void>
@@ -138,7 +138,7 @@ tasks:
 3. Best Practices:
 - Group related actions into logical tasks
 - Use natural language descriptions
-- Add deepThink: true for complex interactions
+- Add deepLocate: true for complex interactions
 - Keep task names concise but descriptive
 4. CRITICAL - YAML Indentation Rules:
@@ -176,33 +176,33 @@ tasks:
       # Tap an element described by a prompt.
       - aiTap: <prompt>
-        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
+        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
         xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
         cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
       # Double click an element described by a prompt.
       - aiDoubleClick: <prompt>
-        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
+        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
         xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
         cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
       # Hover over an element described by a prompt.
       - aiHover: <prompt>
-        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
+        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
         xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
         cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
       # Input text into an element described by a prompt.
       - aiInput: <final text content of the input>
         locate: <prompt>
-        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
+        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
         xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
         cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
       # Press a key (e.g., Enter, Tab, Escape) on an element described by a prompt.
       - aiKeyboardPress: <key>
         locate: <prompt>
-        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
+        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
         xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
         cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
@@ -212,7 +212,7 @@ tasks:
         scrollType: 'singleAction' # or 'scrollToTop' | 'scrollToBottom' | 'scrollToLeft' | 'scrollToRight'
         distance: <number> # Optional, the scroll distance in pixels.
         locate: <prompt> # Optional, the element to scroll on.
-        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
+        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
         xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
         cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.

package/dist/lib/env/types.js CHANGED Viewed

@@ -40,7 +40,6 @@ __webpack_require__.d(__webpack_exports__, {
     MIDSCENE_DEBUG_MODE: ()=>MIDSCENE_DEBUG_MODE,
     MIDSCENE_DEBUG_MODEL_PROFILE: ()=>MIDSCENE_DEBUG_MODEL_PROFILE,
     MIDSCENE_DEBUG_MODEL_RESPONSE: ()=>MIDSCENE_DEBUG_MODEL_RESPONSE,
-    MIDSCENE_FORCE_DEEP_THINK: ()=>MIDSCENE_FORCE_DEEP_THINK,
     MIDSCENE_INSIGHT_MODEL_API_KEY: ()=>MIDSCENE_INSIGHT_MODEL_API_KEY,
     MIDSCENE_INSIGHT_MODEL_BASE_URL: ()=>MIDSCENE_INSIGHT_MODEL_BASE_URL,
     MIDSCENE_INSIGHT_MODEL_FAMILY: ()=>MIDSCENE_INSIGHT_MODEL_FAMILY,
@@ -126,7 +125,6 @@ const MIDSCENE_MCP_USE_PUPPETEER_MODE = 'MIDSCENE_MCP_USE_PUPPETEER_MODE';
 const MIDSCENE_MCP_CHROME_PATH = 'MIDSCENE_MCP_CHROME_PATH';
 const MIDSCENE_MCP_ANDROID_MODE = 'MIDSCENE_MCP_ANDROID_MODE';
 const DOCKER_CONTAINER = 'DOCKER_CONTAINER';
-const MIDSCENE_FORCE_DEEP_THINK = 'MIDSCENE_FORCE_DEEP_THINK';
 const MIDSCENE_LANGSMITH_DEBUG = 'MIDSCENE_LANGSMITH_DEBUG';
 const MIDSCENE_LANGFUSE_DEBUG = 'MIDSCENE_LANGFUSE_DEBUG';
 const MIDSCENE_MODEL_SOCKS_PROXY = 'MIDSCENE_MODEL_SOCKS_PROXY';
@@ -207,7 +205,6 @@ const BASIC_ENV_KEYS = [
 ];
 const BOOLEAN_ENV_KEYS = [
     MIDSCENE_CACHE,
-    MIDSCENE_FORCE_DEEP_THINK,
     MIDSCENE_MCP_USE_PUPPETEER_MODE,
     MIDSCENE_MCP_ANDROID_MODE,
     MIDSCENE_LANGSMITH_DEBUG,
@@ -341,7 +338,6 @@ exports.MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG = __webpack_exports__.MIDSCENE_DAN
 exports.MIDSCENE_DEBUG_MODE = __webpack_exports__.MIDSCENE_DEBUG_MODE;
 exports.MIDSCENE_DEBUG_MODEL_PROFILE = __webpack_exports__.MIDSCENE_DEBUG_MODEL_PROFILE;
 exports.MIDSCENE_DEBUG_MODEL_RESPONSE = __webpack_exports__.MIDSCENE_DEBUG_MODEL_RESPONSE;
-exports.MIDSCENE_FORCE_DEEP_THINK = __webpack_exports__.MIDSCENE_FORCE_DEEP_THINK;
 exports.MIDSCENE_INSIGHT_MODEL_API_KEY = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_API_KEY;
 exports.MIDSCENE_INSIGHT_MODEL_BASE_URL = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_BASE_URL;
 exports.MIDSCENE_INSIGHT_MODEL_FAMILY = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_FAMILY;
@@ -433,7 +429,6 @@ for(var __rspack_i in __webpack_exports__)if (-1 === [
     "MIDSCENE_DEBUG_MODE",
     "MIDSCENE_DEBUG_MODEL_PROFILE",
     "MIDSCENE_DEBUG_MODEL_RESPONSE",
-    "MIDSCENE_FORCE_DEEP_THINK",
     "MIDSCENE_INSIGHT_MODEL_API_KEY",
     "MIDSCENE_INSIGHT_MODEL_BASE_URL",
     "MIDSCENE_INSIGHT_MODEL_FAMILY",

package/dist/types/constants/example-code.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
-export declare const PLAYWRIGHT_EXAMPLE_CODE = "\n// Reference the following code to generate Midscene test cases\n// The following is test code for Midscene AI, for reference\n// The following is Playwright syntax, you can use Playwright to assist in test generation\nIMPORTANT: Follow these exact type signatures for AI functions:\n\n// Type signatures for AI functions:\naiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<void>\naiInput(text: string, locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiTap(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiHover(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiDoubleClick(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiKeyboardPress(key: string, locate?: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiScroll(locate: string | undefined, options: {\n  direction?: 'up' | 'down' | 'left' | 'right',\n  scrollType?: 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft',\n  distance?: number | null,\n  deepThink?: boolean,\n  xpath?: string,\n  cacheable?: boolean\n}): Promise<void>\naiAssert(assertion: string, options?: { errorMessage?: string }): Promise<void>\naiWaitFor(prompt: string, options?: { timeout?: number }): Promise<void>\naiQuery<T>(queryObject: Record<string, string>): Promise<T> // Extracts data from page based on descriptions\n\n// examples:\n// Reference the following code to generate Midscene test cases\n// The following is test code for Midscene AI, for reference\n// The following is Playwright syntax, you can use Playwright to assist in test generation\nimport { test as base } from '@playwright/test';\nimport type { PlayWrightAiFixtureType } from '@midscene/web/playwright';\nimport { PlaywrightAiFixture } from '@midscene/web/playwright';\n\nconst test = base.extend<PlayWrightAiFixtureType>(PlaywrightAiFixture({\n  waitForNetworkIdleTimeout: 2000, // optional, the timeout for waiting for network idle between each action, default is 2000ms\n}));\n\n\ntest.beforeEach(async ({ page }) => {\n  await page.goto('https://www.xxx.com/');\n  await page.setViewportSize({ width: 1920, height: 1080 });\n});\n\ntest('ai shop', async ({\n  aiAct,\n  aiInput,\n  aiAssert,\n  aiQuery,\n  aiKeyboardPress,\n  aiHover,\n  aiTap,\n  aiWaitFor,\n  agentForPage,\n  page,\n}) => {\n  // login\n  await aiAssert('The page shows the login interface');\n  await aiInput('user_name', 'in user name input');\n  await aiInput('password', 'in password input');\n  await aiKeyboardPress('Enter', 'Login Button');\n\n  // check the login success\n  await aiWaitFor('The page shows that the loading is complete');\n  await aiAssert('The current page shows the product detail page');\n\n  // check the product info\n  const dataA = await aiQuery({\n    userInfo: 'User information in the format {name: string}',\n    theFirstProductInfo: 'The first product info in the format {name: string, price: number}',\n  });\n  expect(dataA.theFirstProductInfo.name).toBe('xxx');\n  expect(dataA.theFirstProductInfo.price).toBe(100);\n\n\n  // add to cart\n  await aiTap('click add to cart button');\n  \n  await aiTap('click right top cart icon');\n  await aiAssert('The cart icon shows the number 1');\n});\n";
-export declare const YAML_EXAMPLE_CODE = "\n1. Format:\n\nweb:\n  url: \"starting_url\"\n  viewportWidth: 1280\n  viewportHeight: 960\n\ntasks:\n  - name: \"descriptive task name\"\n    flow:\n      - aiTap: \"element description\"\n      - aiInput: 'text value'\n        locate: 'input field description'\n      - aiScroll:\n        direction: down/up\n        scrollType: scrollToBottom/scrollToTop/singleAction\n      - aiAssert: \"expected state\"\n      - sleep: milliseconds\n\n2. Action Types:\n- aiTap: for clicks (natural language targeting)\n- aiInput: for text input with 'locate' field\n- aiScroll: with direction and scrollType\n- aiAssert: for validations\n- sleep: for delays (milliseconds)\n\n3. Best Practices:\n- Group related actions into logical tasks\n- Use natural language descriptions\n- Add deepThink: true for complex interactions\n- Keep task names concise but descriptive\n\n4. CRITICAL - YAML Indentation Rules:\n- For actions with additional parameters (aiScroll, aiInput, aiKeyboardPress), the parameters must be SIBLING keys, NOT nested children\n- Parameters like direction, scrollType, locate must align with the action key, not indented further\n- CORRECT indentation example:\n      - aiScroll:\n        direction: down\n        scrollType: singleAction\n- WRONG indentation (DO NOT do this):\n      - aiScroll:\n          direction: down\n          scrollType: singleAction\n\n\n\nYAML type\ntasks:\n  - name: <name>\n    continueOnError: <boolean> # Optional, whether to continue to the next task on error, defaults to false.\n    flow:\n      # Auto Planning (.ai)\n      # ----------------\n\n      # Perform an interaction. `ai` is a shorthand for `aiAct`.\n      - ai: <prompt>\n        cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n      # This usage is the same as `ai`.\n      - aiAct: <prompt>\n        cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n      # Instant Action (.aiTap, .aiDoubleClick, .aiHover, .aiInput, .aiKeyboardPress, .aiScroll)\n      # ----------------\n\n      # Tap an element described by a prompt.\n      - aiTap: <prompt>\n        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n        xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n        cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n      # Double click an element described by a prompt.\n      - aiDoubleClick: <prompt>\n        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n        xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n        cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n      # Hover over an element described by a prompt.\n      - aiHover: <prompt>\n        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n        xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n        cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n      # Input text into an element described by a prompt.\n      - aiInput: <final text content of the input>\n        locate: <prompt>\n        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n        xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n        cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n      # Press a key (e.g., Enter, Tab, Escape) on an element described by a prompt.\n      - aiKeyboardPress: <key>\n        locate: <prompt>\n        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n        xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n        cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n      # Scroll globally or on an element described by a prompt.\n      - aiScroll:\n        direction: 'up' # or 'down' | 'left' | 'right'\n        scrollType: 'singleAction' # or 'scrollToTop' | 'scrollToBottom' | 'scrollToLeft' | 'scrollToRight'\n        distance: <number> # Optional, the scroll distance in pixels.\n        locate: <prompt> # Optional, the element to scroll on.\n        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.\n        xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n        cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n      # Record the current screenshot with a description in the report file.\n      - recordToReport: <title> # Optional, the title of the screenshot. If not provided, the title will be 'untitled'.\n        content: <content> # Optional, the description of the screenshot.\n\n      # Data Extraction\n      # ----------------\n\n      # Perform a query that returns a JSON object.\n      - aiQuery: <prompt> # Remember to describe the format of the result in the prompt.\n        name: <name> # The key for the query result in the JSON output.\n\n      # More APIs\n      # ----------------\n\n      # Wait for a condition to be met, with a timeout (in ms, optional, defaults to 30000).\n      - aiWaitFor: <prompt>\n        timeout: <ms>\n\n      # Perform an assertion.\n      - aiAssert: <prompt>\n        errorMessage: <error-message> # Optional, the error message to print if the assertion fails.\n\n      # Wait for a specified amount of time.\n      - sleep: <ms>\n\n      # Execute a piece of JavaScript code in the web page context.\n      - javascript: <javascript>\n        name: <name> # Optional, assign a name to the return value, which will be used as a key in the JSON output.\n\n  - name: <name>\n    flow:\n      # ...\n";
+export declare const PLAYWRIGHT_EXAMPLE_CODE = "\n// Reference the following code to generate Midscene test cases\n// The following is test code for Midscene AI, for reference\n// The following is Playwright syntax, you can use Playwright to assist in test generation\nIMPORTANT: Follow these exact type signatures for AI functions:\n\n// Type signatures for AI functions:\naiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<void>\naiInput(text: string, locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiTap(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiHover(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiDoubleClick(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiKeyboardPress(key: string, locate?: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiScroll(locate: string | undefined, options: {\n  direction?: 'up' | 'down' | 'left' | 'right',\n  scrollType?: 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft',\n  distance?: number | null,\n  deepLocate?: boolean,\n  xpath?: string,\n  cacheable?: boolean\n}): Promise<void>\naiAssert(assertion: string, options?: { errorMessage?: string }): Promise<void>\naiWaitFor(prompt: string, options?: { timeout?: number }): Promise<void>\naiQuery<T>(queryObject: Record<string, string>): Promise<T> // Extracts data from page based on descriptions\n\n// examples:\n// Reference the following code to generate Midscene test cases\n// The following is test code for Midscene AI, for reference\n// The following is Playwright syntax, you can use Playwright to assist in test generation\nimport { test as base } from '@playwright/test';\nimport type { PlayWrightAiFixtureType } from '@midscene/web/playwright';\nimport { PlaywrightAiFixture } from '@midscene/web/playwright';\n\nconst test = base.extend<PlayWrightAiFixtureType>(PlaywrightAiFixture({\n  waitForNetworkIdleTimeout: 2000, // optional, the timeout for waiting for network idle between each action, default is 2000ms\n}));\n\n\ntest.beforeEach(async ({ page }) => {\n  await page.goto('https://www.xxx.com/');\n  await page.setViewportSize({ width: 1920, height: 1080 });\n});\n\ntest('ai shop', async ({\n  aiAct,\n  aiInput,\n  aiAssert,\n  aiQuery,\n  aiKeyboardPress,\n  aiHover,\n  aiTap,\n  aiWaitFor,\n  agentForPage,\n  page,\n}) => {\n  // login\n  await aiAssert('The page shows the login interface');\n  await aiInput('user_name', 'in user name input');\n  await aiInput('password', 'in password input');\n  await aiKeyboardPress('Enter', 'Login Button');\n\n  // check the login success\n  await aiWaitFor('The page shows that the loading is complete');\n  await aiAssert('The current page shows the product detail page');\n\n  // check the product info\n  const dataA = await aiQuery({\n    userInfo: 'User information in the format {name: string}',\n    theFirstProductInfo: 'The first product info in the format {name: string, price: number}',\n  });\n  expect(dataA.theFirstProductInfo.name).toBe('xxx');\n  expect(dataA.theFirstProductInfo.price).toBe(100);\n\n\n  // add to cart\n  await aiTap('click add to cart button');\n  \n  await aiTap('click right top cart icon');\n  await aiAssert('The cart icon shows the number 1');\n});\n";
+export declare const YAML_EXAMPLE_CODE = "\n1. Format:\n\nweb:\n  url: \"starting_url\"\n  viewportWidth: 1280\n  viewportHeight: 960\n\ntasks:\n  - name: \"descriptive task name\"\n    flow:\n      - aiTap: \"element description\"\n      - aiInput: 'text value'\n        locate: 'input field description'\n      - aiScroll:\n        direction: down/up\n        scrollType: scrollToBottom/scrollToTop/singleAction\n      - aiAssert: \"expected state\"\n      - sleep: milliseconds\n\n2. Action Types:\n- aiTap: for clicks (natural language targeting)\n- aiInput: for text input with 'locate' field\n- aiScroll: with direction and scrollType\n- aiAssert: for validations\n- sleep: for delays (milliseconds)\n\n3. Best Practices:\n- Group related actions into logical tasks\n- Use natural language descriptions\n- Add deepLocate: true for complex interactions\n- Keep task names concise but descriptive\n\n4. CRITICAL - YAML Indentation Rules:\n- For actions with additional parameters (aiScroll, aiInput, aiKeyboardPress), the parameters must be SIBLING keys, NOT nested children\n- Parameters like direction, scrollType, locate must align with the action key, not indented further\n- CORRECT indentation example:\n      - aiScroll:\n        direction: down\n        scrollType: singleAction\n- WRONG indentation (DO NOT do this):\n      - aiScroll:\n          direction: down\n          scrollType: singleAction\n\n\n\nYAML type\ntasks:\n  - name: <name>\n    continueOnError: <boolean> # Optional, whether to continue to the next task on error, defaults to false.\n    flow:\n      # Auto Planning (.ai)\n      # ----------------\n\n      # Perform an interaction. `ai` is a shorthand for `aiAct`.\n      - ai: <prompt>\n        cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n      # This usage is the same as `ai`.\n      - aiAct: <prompt>\n        cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n      # Instant Action (.aiTap, .aiDoubleClick, .aiHover, .aiInput, .aiKeyboardPress, .aiScroll)\n      # ----------------\n\n      # Tap an element described by a prompt.\n      - aiTap: <prompt>\n        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n        xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n        cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n      # Double click an element described by a prompt.\n      - aiDoubleClick: <prompt>\n        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n        xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n        cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n      # Hover over an element described by a prompt.\n      - aiHover: <prompt>\n        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n        xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n        cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n      # Input text into an element described by a prompt.\n      - aiInput: <final text content of the input>\n        locate: <prompt>\n        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n        xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n        cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n      # Press a key (e.g., Enter, Tab, Escape) on an element described by a prompt.\n      - aiKeyboardPress: <key>\n        locate: <prompt>\n        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n        xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n        cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n      # Scroll globally or on an element described by a prompt.\n      - aiScroll:\n        direction: 'up' # or 'down' | 'left' | 'right'\n        scrollType: 'singleAction' # or 'scrollToTop' | 'scrollToBottom' | 'scrollToLeft' | 'scrollToRight'\n        distance: <number> # Optional, the scroll distance in pixels.\n        locate: <prompt> # Optional, the element to scroll on.\n        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n        xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n        cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n      # Record the current screenshot with a description in the report file.\n      - recordToReport: <title> # Optional, the title of the screenshot. If not provided, the title will be 'untitled'.\n        content: <content> # Optional, the description of the screenshot.\n\n      # Data Extraction\n      # ----------------\n\n      # Perform a query that returns a JSON object.\n      - aiQuery: <prompt> # Remember to describe the format of the result in the prompt.\n        name: <name> # The key for the query result in the JSON output.\n\n      # More APIs\n      # ----------------\n\n      # Wait for a condition to be met, with a timeout (in ms, optional, defaults to 30000).\n      - aiWaitFor: <prompt>\n        timeout: <ms>\n\n      # Perform an assertion.\n      - aiAssert: <prompt>\n        errorMessage: <error-message> # Optional, the error message to print if the assertion fails.\n\n      # Wait for a specified amount of time.\n      - sleep: <ms>\n\n      # Execute a piece of JavaScript code in the web page context.\n      - javascript: <javascript>\n        name: <name> # Optional, assign a name to the return value, which will be used as a key in the JSON output.\n\n  - name: <name>\n    flow:\n      # ...\n";

package/dist/types/env/types.d.ts CHANGED Viewed

@@ -8,7 +8,6 @@ export declare const MIDSCENE_MCP_USE_PUPPETEER_MODE = "MIDSCENE_MCP_USE_PUPPETE
 export declare const MIDSCENE_MCP_CHROME_PATH = "MIDSCENE_MCP_CHROME_PATH";
 export declare const MIDSCENE_MCP_ANDROID_MODE = "MIDSCENE_MCP_ANDROID_MODE";
 export declare const DOCKER_CONTAINER = "DOCKER_CONTAINER";
-export declare const MIDSCENE_FORCE_DEEP_THINK = "MIDSCENE_FORCE_DEEP_THINK";
 export declare const MIDSCENE_LANGSMITH_DEBUG = "MIDSCENE_LANGSMITH_DEBUG";
 export declare const MIDSCENE_LANGFUSE_DEBUG = "MIDSCENE_LANGFUSE_DEBUG";
 export declare const MIDSCENE_MODEL_SOCKS_PROXY = "MIDSCENE_MODEL_SOCKS_PROXY";
@@ -105,7 +104,7 @@ export declare const UNUSED_ENV_KEYS: string[];
  * can not be override by overrideAIConfig
  */
 export declare const BASIC_ENV_KEYS: readonly ["MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR"];
-export declare const BOOLEAN_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_FORCE_DEEP_THINK", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET"];
+export declare const BOOLEAN_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET"];
 export declare const NUMBER_ENV_KEYS: readonly ["MIDSCENE_MODEL_MAX_TOKENS", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT"];
 export declare const STRING_ENV_KEYS: readonly ["MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER"];
 /**
@@ -113,14 +112,14 @@ export declare const STRING_ENV_KEYS: readonly ["MIDSCENE_MODEL_MAX_TOKENS", "OP
  * Can not be override by agent.modelConfig but can be override by overrideAIConfig
  * Can be access at any time
  */
-export declare const GLOBAL_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_FORCE_DEEP_THINK", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET", "MIDSCENE_MODEL_MAX_TOKENS", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER"];
+export declare const GLOBAL_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET", "MIDSCENE_MODEL_MAX_TOKENS", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER"];
 /**
  * Model related eve keys, used for declare which model to use.
  * Can be override by both agent.modelConfig and overrideAIConfig
  * Can only be access after agent.constructor
  */
 export declare const MODEL_ENV_KEYS: readonly ["MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_MODEL_TIMEOUT", "MIDSCENE_MODEL_TEMPERATURE", "MIDSCENE_MODEL_RETRY_COUNT", "MIDSCENE_MODEL_RETRY_INTERVAL", "MIDSCENE_MODEL_REASONING_EFFORT", "MIDSCENE_MODEL_REASONING_ENABLED", "MIDSCENE_MODEL_REASONING_BUDGET", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_MODEL_TIMEOUT", "MIDSCENE_INSIGHT_MODEL_TEMPERATURE", "MIDSCENE_INSIGHT_MODEL_RETRY_COUNT", "MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL", "MIDSCENE_INSIGHT_MODEL_FAMILY", "MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT", "MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED", "MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_MODEL_TIMEOUT", "MIDSCENE_PLANNING_MODEL_TEMPERATURE", "MIDSCENE_PLANNING_MODEL_RETRY_COUNT", "MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL", "MIDSCENE_PLANNING_MODEL_FAMILY", "MIDSCENE_PLANNING_MODEL_REASONING_EFFORT", "MIDSCENE_PLANNING_MODEL_REASONING_ENABLED", "MIDSCENE_PLANNING_MODEL_REASONING_BUDGET", "MIDSCENE_MODEL_FAMILY"];
-export declare const ALL_ENV_KEYS: readonly [...string[], "MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR", "MIDSCENE_CACHE", "MIDSCENE_FORCE_DEEP_THINK", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET", "MIDSCENE_MODEL_MAX_TOKENS", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER", "MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_MODEL_TIMEOUT", "MIDSCENE_MODEL_TEMPERATURE", "MIDSCENE_MODEL_RETRY_COUNT", "MIDSCENE_MODEL_RETRY_INTERVAL", "MIDSCENE_MODEL_REASONING_EFFORT", "MIDSCENE_MODEL_REASONING_ENABLED", "MIDSCENE_MODEL_REASONING_BUDGET", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_MODEL_TIMEOUT", "MIDSCENE_INSIGHT_MODEL_TEMPERATURE", "MIDSCENE_INSIGHT_MODEL_RETRY_COUNT", "MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL", "MIDSCENE_INSIGHT_MODEL_FAMILY", "MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT", "MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED", "MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_MODEL_TIMEOUT", "MIDSCENE_PLANNING_MODEL_TEMPERATURE", "MIDSCENE_PLANNING_MODEL_RETRY_COUNT", "MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL", "MIDSCENE_PLANNING_MODEL_FAMILY", "MIDSCENE_PLANNING_MODEL_REASONING_EFFORT", "MIDSCENE_PLANNING_MODEL_REASONING_ENABLED", "MIDSCENE_PLANNING_MODEL_REASONING_BUDGET", "MIDSCENE_MODEL_FAMILY"];
+export declare const ALL_ENV_KEYS: readonly [...string[], "MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR", "MIDSCENE_CACHE", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET", "MIDSCENE_MODEL_MAX_TOKENS", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER", "MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_MODEL_TIMEOUT", "MIDSCENE_MODEL_TEMPERATURE", "MIDSCENE_MODEL_RETRY_COUNT", "MIDSCENE_MODEL_RETRY_INTERVAL", "MIDSCENE_MODEL_REASONING_EFFORT", "MIDSCENE_MODEL_REASONING_ENABLED", "MIDSCENE_MODEL_REASONING_BUDGET", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_MODEL_TIMEOUT", "MIDSCENE_INSIGHT_MODEL_TEMPERATURE", "MIDSCENE_INSIGHT_MODEL_RETRY_COUNT", "MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL", "MIDSCENE_INSIGHT_MODEL_FAMILY", "MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT", "MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED", "MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_MODEL_TIMEOUT", "MIDSCENE_PLANNING_MODEL_TEMPERATURE", "MIDSCENE_PLANNING_MODEL_RETRY_COUNT", "MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL", "MIDSCENE_PLANNING_MODEL_FAMILY", "MIDSCENE_PLANNING_MODEL_REASONING_EFFORT", "MIDSCENE_PLANNING_MODEL_REASONING_ENABLED", "MIDSCENE_PLANNING_MODEL_REASONING_BUDGET", "MIDSCENE_MODEL_FAMILY"];
 export type TEnvKeys = (typeof ALL_ENV_KEYS)[number];
 export type TGlobalConfig = Record<TEnvKeys, string | undefined>;
 export type TVlModeValues = 'qwen2.5-vl' | 'qwen3-vl' | 'qwen3.5' | 'doubao-vision' | 'doubao-seed' | 'gemini' | 'vlm-ui-tars' | 'vlm-ui-tars-doubao' | 'vlm-ui-tars-doubao-1.5' | 'glm-v' | 'auto-glm' | 'auto-glm-multilingual';

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@midscene/shared",
-  "version": "1.5.1-beta-20260302102736.0",
+  "version": "1.5.1",
   "repository": "https://github.com/web-infra-dev/midscene",
   "homepage": "https://midscenejs.com/",
   "types": "./dist/types/index.d.ts",

package/src/constants/example-code.ts CHANGED Viewed

@@ -6,16 +6,16 @@ IMPORTANT: Follow these exact type signatures for AI functions:
 // Type signatures for AI functions:
 aiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<void>
-aiInput(text: string, locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
-aiTap(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
-aiHover(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
-aiDoubleClick(locate: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
-aiKeyboardPress(key: string, locate?: string, options?: { deepThink?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
+aiInput(text: string, locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
+aiTap(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
+aiHover(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
+aiDoubleClick(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
+aiKeyboardPress(key: string, locate?: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
 aiScroll(locate: string | undefined, options: {
   direction?: 'up' | 'down' | 'left' | 'right',
   scrollType?: 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft',
   distance?: number | null,
-  deepThink?: boolean,
+  deepLocate?: boolean,
   xpath?: string,
   cacheable?: boolean
 }): Promise<void>
@@ -110,7 +110,7 @@ tasks:
 3. Best Practices:
 - Group related actions into logical tasks
 - Use natural language descriptions
-- Add deepThink: true for complex interactions
+- Add deepLocate: true for complex interactions
 - Keep task names concise but descriptive
 4. CRITICAL - YAML Indentation Rules:
@@ -148,33 +148,33 @@ tasks:
       # Tap an element described by a prompt.
       - aiTap: <prompt>
-        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
+        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
         xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
         cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
       # Double click an element described by a prompt.
       - aiDoubleClick: <prompt>
-        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
+        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
         xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
         cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
       # Hover over an element described by a prompt.
       - aiHover: <prompt>
-        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
+        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
         xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
         cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
       # Input text into an element described by a prompt.
       - aiInput: <final text content of the input>
         locate: <prompt>
-        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
+        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
         xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
         cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
       # Press a key (e.g., Enter, Tab, Escape) on an element described by a prompt.
       - aiKeyboardPress: <key>
         locate: <prompt>
-        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
+        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
         xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
         cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
@@ -184,7 +184,7 @@ tasks:
         scrollType: 'singleAction' # or 'scrollToTop' | 'scrollToBottom' | 'scrollToLeft' | 'scrollToRight'
         distance: <number> # Optional, the scroll distance in pixels.
         locate: <prompt> # Optional, the element to scroll on.
-        deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
+        deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
         xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
         cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.

package/src/env/types.ts CHANGED Viewed

@@ -12,7 +12,6 @@ export const MIDSCENE_MCP_USE_PUPPETEER_MODE =
 export const MIDSCENE_MCP_CHROME_PATH = 'MIDSCENE_MCP_CHROME_PATH';
 export const MIDSCENE_MCP_ANDROID_MODE = 'MIDSCENE_MCP_ANDROID_MODE';
 export const DOCKER_CONTAINER = 'DOCKER_CONTAINER';
-export const MIDSCENE_FORCE_DEEP_THINK = 'MIDSCENE_FORCE_DEEP_THINK';
 // Observability
 export const MIDSCENE_LANGSMITH_DEBUG = 'MIDSCENE_LANGSMITH_DEBUG';
@@ -164,7 +163,6 @@ export const BASIC_ENV_KEYS = [
 export const BOOLEAN_ENV_KEYS = [
   MIDSCENE_CACHE,
-  MIDSCENE_FORCE_DEEP_THINK,
   MIDSCENE_MCP_USE_PUPPETEER_MODE,
   MIDSCENE_MCP_ANDROID_MODE,
   MIDSCENE_LANGSMITH_DEBUG,