@midscene/shared 1.5.1-beta-20260302102736.0 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/constants/example-code.mjs +13 -13
- package/dist/es/env/types.mjs +1 -3
- package/dist/lib/constants/example-code.js +13 -13
- package/dist/lib/env/types.js +0 -5
- package/dist/types/constants/example-code.d.ts +2 -2
- package/dist/types/env/types.d.ts +3 -4
- package/package.json +1 -1
- package/src/constants/example-code.ts +13 -13
- package/src/env/types.ts +0 -2
|
@@ -6,16 +6,16 @@ IMPORTANT: Follow these exact type signatures for AI functions:
|
|
|
6
6
|
|
|
7
7
|
// Type signatures for AI functions:
|
|
8
8
|
aiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<void>
|
|
9
|
-
aiInput(text: string, locate: string, options?: {
|
|
10
|
-
aiTap(locate: string, options?: {
|
|
11
|
-
aiHover(locate: string, options?: {
|
|
12
|
-
aiDoubleClick(locate: string, options?: {
|
|
13
|
-
aiKeyboardPress(key: string, locate?: string, options?: {
|
|
9
|
+
aiInput(text: string, locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
10
|
+
aiTap(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
11
|
+
aiHover(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
12
|
+
aiDoubleClick(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
13
|
+
aiKeyboardPress(key: string, locate?: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
14
14
|
aiScroll(locate: string | undefined, options: {
|
|
15
15
|
direction?: 'up' | 'down' | 'left' | 'right',
|
|
16
16
|
scrollType?: 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft',
|
|
17
17
|
distance?: number | null,
|
|
18
|
-
|
|
18
|
+
deepLocate?: boolean,
|
|
19
19
|
xpath?: string,
|
|
20
20
|
cacheable?: boolean
|
|
21
21
|
}): Promise<void>
|
|
@@ -109,7 +109,7 @@ tasks:
|
|
|
109
109
|
3. Best Practices:
|
|
110
110
|
- Group related actions into logical tasks
|
|
111
111
|
- Use natural language descriptions
|
|
112
|
-
- Add
|
|
112
|
+
- Add deepLocate: true for complex interactions
|
|
113
113
|
- Keep task names concise but descriptive
|
|
114
114
|
|
|
115
115
|
4. CRITICAL - YAML Indentation Rules:
|
|
@@ -147,33 +147,33 @@ tasks:
|
|
|
147
147
|
|
|
148
148
|
# Tap an element described by a prompt.
|
|
149
149
|
- aiTap: <prompt>
|
|
150
|
-
|
|
150
|
+
deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
|
|
151
151
|
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
|
|
152
152
|
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
|
|
153
153
|
|
|
154
154
|
# Double click an element described by a prompt.
|
|
155
155
|
- aiDoubleClick: <prompt>
|
|
156
|
-
|
|
156
|
+
deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
|
|
157
157
|
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
|
|
158
158
|
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
|
|
159
159
|
|
|
160
160
|
# Hover over an element described by a prompt.
|
|
161
161
|
- aiHover: <prompt>
|
|
162
|
-
|
|
162
|
+
deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
|
|
163
163
|
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
|
|
164
164
|
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
|
|
165
165
|
|
|
166
166
|
# Input text into an element described by a prompt.
|
|
167
167
|
- aiInput: <final text content of the input>
|
|
168
168
|
locate: <prompt>
|
|
169
|
-
|
|
169
|
+
deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
|
|
170
170
|
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
|
|
171
171
|
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
|
|
172
172
|
|
|
173
173
|
# Press a key (e.g., Enter, Tab, Escape) on an element described by a prompt.
|
|
174
174
|
- aiKeyboardPress: <key>
|
|
175
175
|
locate: <prompt>
|
|
176
|
-
|
|
176
|
+
deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
|
|
177
177
|
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
|
|
178
178
|
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
|
|
179
179
|
|
|
@@ -183,7 +183,7 @@ tasks:
|
|
|
183
183
|
scrollType: 'singleAction' # or 'scrollToTop' | 'scrollToBottom' | 'scrollToLeft' | 'scrollToRight'
|
|
184
184
|
distance: <number> # Optional, the scroll distance in pixels.
|
|
185
185
|
locate: <prompt> # Optional, the element to scroll on.
|
|
186
|
-
|
|
186
|
+
deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
|
|
187
187
|
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
|
|
188
188
|
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
|
|
189
189
|
|
package/dist/es/env/types.mjs
CHANGED
|
@@ -8,7 +8,6 @@ const MIDSCENE_MCP_USE_PUPPETEER_MODE = 'MIDSCENE_MCP_USE_PUPPETEER_MODE';
|
|
|
8
8
|
const MIDSCENE_MCP_CHROME_PATH = 'MIDSCENE_MCP_CHROME_PATH';
|
|
9
9
|
const MIDSCENE_MCP_ANDROID_MODE = 'MIDSCENE_MCP_ANDROID_MODE';
|
|
10
10
|
const DOCKER_CONTAINER = 'DOCKER_CONTAINER';
|
|
11
|
-
const MIDSCENE_FORCE_DEEP_THINK = 'MIDSCENE_FORCE_DEEP_THINK';
|
|
12
11
|
const MIDSCENE_LANGSMITH_DEBUG = 'MIDSCENE_LANGSMITH_DEBUG';
|
|
13
12
|
const MIDSCENE_LANGFUSE_DEBUG = 'MIDSCENE_LANGFUSE_DEBUG';
|
|
14
13
|
const MIDSCENE_MODEL_SOCKS_PROXY = 'MIDSCENE_MODEL_SOCKS_PROXY';
|
|
@@ -89,7 +88,6 @@ const BASIC_ENV_KEYS = [
|
|
|
89
88
|
];
|
|
90
89
|
const BOOLEAN_ENV_KEYS = [
|
|
91
90
|
MIDSCENE_CACHE,
|
|
92
|
-
MIDSCENE_FORCE_DEEP_THINK,
|
|
93
91
|
MIDSCENE_MCP_USE_PUPPETEER_MODE,
|
|
94
92
|
MIDSCENE_MCP_ANDROID_MODE,
|
|
95
93
|
MIDSCENE_LANGSMITH_DEBUG,
|
|
@@ -207,4 +205,4 @@ var types_UITarsModelVersion = /*#__PURE__*/ function(UITarsModelVersion) {
|
|
|
207
205
|
UITarsModelVersion["DOUBAO_1_5_20B"] = "doubao-1.5-20B";
|
|
208
206
|
return UITarsModelVersion;
|
|
209
207
|
}({});
|
|
210
|
-
export { ALL_ENV_KEYS, BASIC_ENV_KEYS, BOOLEAN_ENV_KEYS, DOCKER_CONTAINER, GLOBAL_ENV_KEYS, MATCH_BY_POSITION, MIDSCENE_ADB_PATH, MIDSCENE_ADB_REMOTE_HOST, MIDSCENE_ADB_REMOTE_PORT, MIDSCENE_ANDROID_IME_STRATEGY, MIDSCENE_CACHE, MIDSCENE_CACHE_MAX_FILENAME_LENGTH, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_MODE, MIDSCENE_DEBUG_MODEL_PROFILE, MIDSCENE_DEBUG_MODEL_RESPONSE,
|
|
208
|
+
export { ALL_ENV_KEYS, BASIC_ENV_KEYS, BOOLEAN_ENV_KEYS, DOCKER_CONTAINER, GLOBAL_ENV_KEYS, MATCH_BY_POSITION, MIDSCENE_ADB_PATH, MIDSCENE_ADB_REMOTE_HOST, MIDSCENE_ADB_REMOTE_PORT, MIDSCENE_ANDROID_IME_STRATEGY, MIDSCENE_CACHE, MIDSCENE_CACHE_MAX_FILENAME_LENGTH, MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG, MIDSCENE_DEBUG_MODE, MIDSCENE_DEBUG_MODEL_PROFILE, MIDSCENE_DEBUG_MODEL_RESPONSE, MIDSCENE_INSIGHT_MODEL_API_KEY, MIDSCENE_INSIGHT_MODEL_BASE_URL, MIDSCENE_INSIGHT_MODEL_FAMILY, MIDSCENE_INSIGHT_MODEL_HTTP_PROXY, MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON, MIDSCENE_INSIGHT_MODEL_NAME, MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET, MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT, MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED, MIDSCENE_INSIGHT_MODEL_RETRY_COUNT, MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL, MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY, MIDSCENE_INSIGHT_MODEL_TEMPERATURE, MIDSCENE_INSIGHT_MODEL_TIMEOUT, MIDSCENE_IOS_DEVICE_UDID, MIDSCENE_IOS_SIMULATOR_UDID, MIDSCENE_LANGFUSE_DEBUG, MIDSCENE_LANGSMITH_DEBUG, MIDSCENE_MCP_ANDROID_MODE, MIDSCENE_MCP_CHROME_PATH, MIDSCENE_MCP_USE_PUPPETEER_MODE, MIDSCENE_MODEL_API_KEY, MIDSCENE_MODEL_BASE_URL, MIDSCENE_MODEL_FAMILY, MIDSCENE_MODEL_HTTP_PROXY, MIDSCENE_MODEL_INIT_CONFIG_JSON, MIDSCENE_MODEL_MAX_TOKENS, MIDSCENE_MODEL_NAME, MIDSCENE_MODEL_REASONING_BUDGET, MIDSCENE_MODEL_REASONING_EFFORT, MIDSCENE_MODEL_REASONING_ENABLED, MIDSCENE_MODEL_RETRY_COUNT, MIDSCENE_MODEL_RETRY_INTERVAL, MIDSCENE_MODEL_SOCKS_PROXY, MIDSCENE_MODEL_TEMPERATURE, MIDSCENE_MODEL_TIMEOUT, MIDSCENE_OPENAI_HTTP_PROXY, MIDSCENE_OPENAI_INIT_CONFIG_JSON, MIDSCENE_OPENAI_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_API_KEY, MIDSCENE_PLANNING_MODEL_BASE_URL, MIDSCENE_PLANNING_MODEL_FAMILY, MIDSCENE_PLANNING_MODEL_HTTP_PROXY, MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON, MIDSCENE_PLANNING_MODEL_NAME, MIDSCENE_PLANNING_MODEL_REASONING_BUDGET, MIDSCENE_PLANNING_MODEL_REASONING_EFFORT, MIDSCENE_PLANNING_MODEL_REASONING_ENABLED, MIDSCENE_PLANNING_MODEL_RETRY_COUNT, MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL, MIDSCENE_PLANNING_MODEL_SOCKS_PROXY, MIDSCENE_PLANNING_MODEL_TEMPERATURE, MIDSCENE_PLANNING_MODEL_TIMEOUT, MIDSCENE_PREFERRED_LANGUAGE, MIDSCENE_REPLANNING_CYCLE_LIMIT, MIDSCENE_REPORT_QUIET, MIDSCENE_REPORT_TAG_NAME, MIDSCENE_RUN_DIR, MIDSCENE_USE_DOUBAO_VISION, MIDSCENE_USE_GEMINI, MIDSCENE_USE_QWEN3_VL, MIDSCENE_USE_QWEN_VL, MIDSCENE_USE_VLM_UI_TARS, MIDSCENE_USE_VL_MODEL, MODEL_ENV_KEYS, MODEL_FAMILY_VALUES, NUMBER_ENV_KEYS, OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MAX_TOKENS, STRING_ENV_KEYS, types_UITarsModelVersion as UITarsModelVersion, UNUSED_ENV_KEYS, VL_MODE_RAW_VALID_VALUES };
|
|
@@ -35,16 +35,16 @@ IMPORTANT: Follow these exact type signatures for AI functions:
|
|
|
35
35
|
|
|
36
36
|
// Type signatures for AI functions:
|
|
37
37
|
aiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<void>
|
|
38
|
-
aiInput(text: string, locate: string, options?: {
|
|
39
|
-
aiTap(locate: string, options?: {
|
|
40
|
-
aiHover(locate: string, options?: {
|
|
41
|
-
aiDoubleClick(locate: string, options?: {
|
|
42
|
-
aiKeyboardPress(key: string, locate?: string, options?: {
|
|
38
|
+
aiInput(text: string, locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
39
|
+
aiTap(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
40
|
+
aiHover(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
41
|
+
aiDoubleClick(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
42
|
+
aiKeyboardPress(key: string, locate?: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
43
43
|
aiScroll(locate: string | undefined, options: {
|
|
44
44
|
direction?: 'up' | 'down' | 'left' | 'right',
|
|
45
45
|
scrollType?: 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft',
|
|
46
46
|
distance?: number | null,
|
|
47
|
-
|
|
47
|
+
deepLocate?: boolean,
|
|
48
48
|
xpath?: string,
|
|
49
49
|
cacheable?: boolean
|
|
50
50
|
}): Promise<void>
|
|
@@ -138,7 +138,7 @@ tasks:
|
|
|
138
138
|
3. Best Practices:
|
|
139
139
|
- Group related actions into logical tasks
|
|
140
140
|
- Use natural language descriptions
|
|
141
|
-
- Add
|
|
141
|
+
- Add deepLocate: true for complex interactions
|
|
142
142
|
- Keep task names concise but descriptive
|
|
143
143
|
|
|
144
144
|
4. CRITICAL - YAML Indentation Rules:
|
|
@@ -176,33 +176,33 @@ tasks:
|
|
|
176
176
|
|
|
177
177
|
# Tap an element described by a prompt.
|
|
178
178
|
- aiTap: <prompt>
|
|
179
|
-
|
|
179
|
+
deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
|
|
180
180
|
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
|
|
181
181
|
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
|
|
182
182
|
|
|
183
183
|
# Double click an element described by a prompt.
|
|
184
184
|
- aiDoubleClick: <prompt>
|
|
185
|
-
|
|
185
|
+
deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
|
|
186
186
|
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
|
|
187
187
|
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
|
|
188
188
|
|
|
189
189
|
# Hover over an element described by a prompt.
|
|
190
190
|
- aiHover: <prompt>
|
|
191
|
-
|
|
191
|
+
deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
|
|
192
192
|
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
|
|
193
193
|
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
|
|
194
194
|
|
|
195
195
|
# Input text into an element described by a prompt.
|
|
196
196
|
- aiInput: <final text content of the input>
|
|
197
197
|
locate: <prompt>
|
|
198
|
-
|
|
198
|
+
deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
|
|
199
199
|
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
|
|
200
200
|
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
|
|
201
201
|
|
|
202
202
|
# Press a key (e.g., Enter, Tab, Escape) on an element described by a prompt.
|
|
203
203
|
- aiKeyboardPress: <key>
|
|
204
204
|
locate: <prompt>
|
|
205
|
-
|
|
205
|
+
deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
|
|
206
206
|
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
|
|
207
207
|
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
|
|
208
208
|
|
|
@@ -212,7 +212,7 @@ tasks:
|
|
|
212
212
|
scrollType: 'singleAction' # or 'scrollToTop' | 'scrollToBottom' | 'scrollToLeft' | 'scrollToRight'
|
|
213
213
|
distance: <number> # Optional, the scroll distance in pixels.
|
|
214
214
|
locate: <prompt> # Optional, the element to scroll on.
|
|
215
|
-
|
|
215
|
+
deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
|
|
216
216
|
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
|
|
217
217
|
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
|
|
218
218
|
|
package/dist/lib/env/types.js
CHANGED
|
@@ -40,7 +40,6 @@ __webpack_require__.d(__webpack_exports__, {
|
|
|
40
40
|
MIDSCENE_DEBUG_MODE: ()=>MIDSCENE_DEBUG_MODE,
|
|
41
41
|
MIDSCENE_DEBUG_MODEL_PROFILE: ()=>MIDSCENE_DEBUG_MODEL_PROFILE,
|
|
42
42
|
MIDSCENE_DEBUG_MODEL_RESPONSE: ()=>MIDSCENE_DEBUG_MODEL_RESPONSE,
|
|
43
|
-
MIDSCENE_FORCE_DEEP_THINK: ()=>MIDSCENE_FORCE_DEEP_THINK,
|
|
44
43
|
MIDSCENE_INSIGHT_MODEL_API_KEY: ()=>MIDSCENE_INSIGHT_MODEL_API_KEY,
|
|
45
44
|
MIDSCENE_INSIGHT_MODEL_BASE_URL: ()=>MIDSCENE_INSIGHT_MODEL_BASE_URL,
|
|
46
45
|
MIDSCENE_INSIGHT_MODEL_FAMILY: ()=>MIDSCENE_INSIGHT_MODEL_FAMILY,
|
|
@@ -126,7 +125,6 @@ const MIDSCENE_MCP_USE_PUPPETEER_MODE = 'MIDSCENE_MCP_USE_PUPPETEER_MODE';
|
|
|
126
125
|
const MIDSCENE_MCP_CHROME_PATH = 'MIDSCENE_MCP_CHROME_PATH';
|
|
127
126
|
const MIDSCENE_MCP_ANDROID_MODE = 'MIDSCENE_MCP_ANDROID_MODE';
|
|
128
127
|
const DOCKER_CONTAINER = 'DOCKER_CONTAINER';
|
|
129
|
-
const MIDSCENE_FORCE_DEEP_THINK = 'MIDSCENE_FORCE_DEEP_THINK';
|
|
130
128
|
const MIDSCENE_LANGSMITH_DEBUG = 'MIDSCENE_LANGSMITH_DEBUG';
|
|
131
129
|
const MIDSCENE_LANGFUSE_DEBUG = 'MIDSCENE_LANGFUSE_DEBUG';
|
|
132
130
|
const MIDSCENE_MODEL_SOCKS_PROXY = 'MIDSCENE_MODEL_SOCKS_PROXY';
|
|
@@ -207,7 +205,6 @@ const BASIC_ENV_KEYS = [
|
|
|
207
205
|
];
|
|
208
206
|
const BOOLEAN_ENV_KEYS = [
|
|
209
207
|
MIDSCENE_CACHE,
|
|
210
|
-
MIDSCENE_FORCE_DEEP_THINK,
|
|
211
208
|
MIDSCENE_MCP_USE_PUPPETEER_MODE,
|
|
212
209
|
MIDSCENE_MCP_ANDROID_MODE,
|
|
213
210
|
MIDSCENE_LANGSMITH_DEBUG,
|
|
@@ -341,7 +338,6 @@ exports.MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG = __webpack_exports__.MIDSCENE_DAN
|
|
|
341
338
|
exports.MIDSCENE_DEBUG_MODE = __webpack_exports__.MIDSCENE_DEBUG_MODE;
|
|
342
339
|
exports.MIDSCENE_DEBUG_MODEL_PROFILE = __webpack_exports__.MIDSCENE_DEBUG_MODEL_PROFILE;
|
|
343
340
|
exports.MIDSCENE_DEBUG_MODEL_RESPONSE = __webpack_exports__.MIDSCENE_DEBUG_MODEL_RESPONSE;
|
|
344
|
-
exports.MIDSCENE_FORCE_DEEP_THINK = __webpack_exports__.MIDSCENE_FORCE_DEEP_THINK;
|
|
345
341
|
exports.MIDSCENE_INSIGHT_MODEL_API_KEY = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_API_KEY;
|
|
346
342
|
exports.MIDSCENE_INSIGHT_MODEL_BASE_URL = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_BASE_URL;
|
|
347
343
|
exports.MIDSCENE_INSIGHT_MODEL_FAMILY = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_FAMILY;
|
|
@@ -433,7 +429,6 @@ for(var __rspack_i in __webpack_exports__)if (-1 === [
|
|
|
433
429
|
"MIDSCENE_DEBUG_MODE",
|
|
434
430
|
"MIDSCENE_DEBUG_MODEL_PROFILE",
|
|
435
431
|
"MIDSCENE_DEBUG_MODEL_RESPONSE",
|
|
436
|
-
"MIDSCENE_FORCE_DEEP_THINK",
|
|
437
432
|
"MIDSCENE_INSIGHT_MODEL_API_KEY",
|
|
438
433
|
"MIDSCENE_INSIGHT_MODEL_BASE_URL",
|
|
439
434
|
"MIDSCENE_INSIGHT_MODEL_FAMILY",
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export declare const PLAYWRIGHT_EXAMPLE_CODE = "\n// Reference the following code to generate Midscene test cases\n// The following is test code for Midscene AI, for reference\n// The following is Playwright syntax, you can use Playwright to assist in test generation\nIMPORTANT: Follow these exact type signatures for AI functions:\n\n// Type signatures for AI functions:\naiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<void>\naiInput(text: string, locate: string, options?: {
|
|
2
|
-
export declare const YAML_EXAMPLE_CODE = "\n1. Format:\n\nweb:\n url: \"starting_url\"\n viewportWidth: 1280\n viewportHeight: 960\n\ntasks:\n - name: \"descriptive task name\"\n flow:\n - aiTap: \"element description\"\n - aiInput: 'text value'\n locate: 'input field description'\n - aiScroll:\n direction: down/up\n scrollType: scrollToBottom/scrollToTop/singleAction\n - aiAssert: \"expected state\"\n - sleep: milliseconds\n\n2. Action Types:\n- aiTap: for clicks (natural language targeting)\n- aiInput: for text input with 'locate' field\n- aiScroll: with direction and scrollType\n- aiAssert: for validations\n- sleep: for delays (milliseconds)\n\n3. Best Practices:\n- Group related actions into logical tasks\n- Use natural language descriptions\n- Add
|
|
1
|
+
export declare const PLAYWRIGHT_EXAMPLE_CODE = "\n// Reference the following code to generate Midscene test cases\n// The following is test code for Midscene AI, for reference\n// The following is Playwright syntax, you can use Playwright to assist in test generation\nIMPORTANT: Follow these exact type signatures for AI functions:\n\n// Type signatures for AI functions:\naiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<void>\naiInput(text: string, locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiTap(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiHover(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiDoubleClick(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiKeyboardPress(key: string, locate?: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiScroll(locate: string | undefined, options: {\n direction?: 'up' | 'down' | 'left' | 'right',\n scrollType?: 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft',\n distance?: number | null,\n deepLocate?: boolean,\n xpath?: string,\n cacheable?: boolean\n}): Promise<void>\naiAssert(assertion: string, options?: { errorMessage?: string }): Promise<void>\naiWaitFor(prompt: string, options?: { timeout?: number }): Promise<void>\naiQuery<T>(queryObject: Record<string, string>): Promise<T> // Extracts data from page based on descriptions\n\n// examples:\n// Reference the following code to generate Midscene test cases\n// The following is test code for Midscene AI, for reference\n// The following is Playwright syntax, you can use Playwright to assist in test generation\nimport { test as base } from '@playwright/test';\nimport type { PlayWrightAiFixtureType } from '@midscene/web/playwright';\nimport { PlaywrightAiFixture } from '@midscene/web/playwright';\n\nconst test = base.extend<PlayWrightAiFixtureType>(PlaywrightAiFixture({\n waitForNetworkIdleTimeout: 2000, // optional, the timeout for waiting for network idle between each action, default is 2000ms\n}));\n\n\ntest.beforeEach(async ({ page }) => {\n await page.goto('https://www.xxx.com/');\n await page.setViewportSize({ width: 1920, height: 1080 });\n});\n\ntest('ai shop', async ({\n aiAct,\n aiInput,\n aiAssert,\n aiQuery,\n aiKeyboardPress,\n aiHover,\n aiTap,\n aiWaitFor,\n agentForPage,\n page,\n}) => {\n // login\n await aiAssert('The page shows the login interface');\n await aiInput('user_name', 'in user name input');\n await aiInput('password', 'in password input');\n await aiKeyboardPress('Enter', 'Login Button');\n\n // check the login success\n await aiWaitFor('The page shows that the loading is complete');\n await aiAssert('The current page shows the product detail page');\n\n // check the product info\n const dataA = await aiQuery({\n userInfo: 'User information in the format {name: string}',\n theFirstProductInfo: 'The first product info in the format {name: string, price: number}',\n });\n expect(dataA.theFirstProductInfo.name).toBe('xxx');\n expect(dataA.theFirstProductInfo.price).toBe(100);\n\n\n // add to cart\n await aiTap('click add to cart button');\n \n await aiTap('click right top cart icon');\n await aiAssert('The cart icon shows the number 1');\n});\n";
|
|
2
|
+
export declare const YAML_EXAMPLE_CODE = "\n1. Format:\n\nweb:\n url: \"starting_url\"\n viewportWidth: 1280\n viewportHeight: 960\n\ntasks:\n - name: \"descriptive task name\"\n flow:\n - aiTap: \"element description\"\n - aiInput: 'text value'\n locate: 'input field description'\n - aiScroll:\n direction: down/up\n scrollType: scrollToBottom/scrollToTop/singleAction\n - aiAssert: \"expected state\"\n - sleep: milliseconds\n\n2. Action Types:\n- aiTap: for clicks (natural language targeting)\n- aiInput: for text input with 'locate' field\n- aiScroll: with direction and scrollType\n- aiAssert: for validations\n- sleep: for delays (milliseconds)\n\n3. Best Practices:\n- Group related actions into logical tasks\n- Use natural language descriptions\n- Add deepLocate: true for complex interactions\n- Keep task names concise but descriptive\n\n4. CRITICAL - YAML Indentation Rules:\n- For actions with additional parameters (aiScroll, aiInput, aiKeyboardPress), the parameters must be SIBLING keys, NOT nested children\n- Parameters like direction, scrollType, locate must align with the action key, not indented further\n- CORRECT indentation example:\n - aiScroll:\n direction: down\n scrollType: singleAction\n- WRONG indentation (DO NOT do this):\n - aiScroll:\n direction: down\n scrollType: singleAction\n\n\n\nYAML type\ntasks:\n - name: <name>\n continueOnError: <boolean> # Optional, whether to continue to the next task on error, defaults to false.\n flow:\n # Auto Planning (.ai)\n # ----------------\n\n # Perform an interaction. `ai` is a shorthand for `aiAct`.\n - ai: <prompt>\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # This usage is the same as `ai`.\n - aiAct: <prompt>\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Instant Action (.aiTap, .aiDoubleClick, .aiHover, .aiInput, .aiKeyboardPress, .aiScroll)\n # ----------------\n\n # Tap an element described by a prompt.\n - aiTap: <prompt>\n deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Double click an element described by a prompt.\n - aiDoubleClick: <prompt>\n deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Hover over an element described by a prompt.\n - aiHover: <prompt>\n deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Input text into an element described by a prompt.\n - aiInput: <final text content of the input>\n locate: <prompt>\n deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Press a key (e.g., Enter, Tab, Escape) on an element described by a prompt.\n - aiKeyboardPress: <key>\n locate: <prompt>\n deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Scroll globally or on an element described by a prompt.\n - aiScroll:\n direction: 'up' # or 'down' | 'left' | 'right'\n scrollType: 'singleAction' # or 'scrollToTop' | 'scrollToBottom' | 'scrollToLeft' | 'scrollToRight'\n distance: <number> # Optional, the scroll distance in pixels.\n locate: <prompt> # Optional, the element to scroll on.\n deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Record the current screenshot with a description in the report file.\n - recordToReport: <title> # Optional, the title of the screenshot. If not provided, the title will be 'untitled'.\n content: <content> # Optional, the description of the screenshot.\n\n # Data Extraction\n # ----------------\n\n # Perform a query that returns a JSON object.\n - aiQuery: <prompt> # Remember to describe the format of the result in the prompt.\n name: <name> # The key for the query result in the JSON output.\n\n # More APIs\n # ----------------\n\n # Wait for a condition to be met, with a timeout (in ms, optional, defaults to 30000).\n - aiWaitFor: <prompt>\n timeout: <ms>\n\n # Perform an assertion.\n - aiAssert: <prompt>\n errorMessage: <error-message> # Optional, the error message to print if the assertion fails.\n\n # Wait for a specified amount of time.\n - sleep: <ms>\n\n # Execute a piece of JavaScript code in the web page context.\n - javascript: <javascript>\n name: <name> # Optional, assign a name to the return value, which will be used as a key in the JSON output.\n\n - name: <name>\n flow:\n # ...\n";
|
|
@@ -8,7 +8,6 @@ export declare const MIDSCENE_MCP_USE_PUPPETEER_MODE = "MIDSCENE_MCP_USE_PUPPETE
|
|
|
8
8
|
export declare const MIDSCENE_MCP_CHROME_PATH = "MIDSCENE_MCP_CHROME_PATH";
|
|
9
9
|
export declare const MIDSCENE_MCP_ANDROID_MODE = "MIDSCENE_MCP_ANDROID_MODE";
|
|
10
10
|
export declare const DOCKER_CONTAINER = "DOCKER_CONTAINER";
|
|
11
|
-
export declare const MIDSCENE_FORCE_DEEP_THINK = "MIDSCENE_FORCE_DEEP_THINK";
|
|
12
11
|
export declare const MIDSCENE_LANGSMITH_DEBUG = "MIDSCENE_LANGSMITH_DEBUG";
|
|
13
12
|
export declare const MIDSCENE_LANGFUSE_DEBUG = "MIDSCENE_LANGFUSE_DEBUG";
|
|
14
13
|
export declare const MIDSCENE_MODEL_SOCKS_PROXY = "MIDSCENE_MODEL_SOCKS_PROXY";
|
|
@@ -105,7 +104,7 @@ export declare const UNUSED_ENV_KEYS: string[];
|
|
|
105
104
|
* can not be override by overrideAIConfig
|
|
106
105
|
*/
|
|
107
106
|
export declare const BASIC_ENV_KEYS: readonly ["MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR"];
|
|
108
|
-
export declare const BOOLEAN_ENV_KEYS: readonly ["MIDSCENE_CACHE", "
|
|
107
|
+
export declare const BOOLEAN_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET"];
|
|
109
108
|
export declare const NUMBER_ENV_KEYS: readonly ["MIDSCENE_MODEL_MAX_TOKENS", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT"];
|
|
110
109
|
export declare const STRING_ENV_KEYS: readonly ["MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER"];
|
|
111
110
|
/**
|
|
@@ -113,14 +112,14 @@ export declare const STRING_ENV_KEYS: readonly ["MIDSCENE_MODEL_MAX_TOKENS", "OP
|
|
|
113
112
|
* Can not be override by agent.modelConfig but can be override by overrideAIConfig
|
|
114
113
|
* Can be access at any time
|
|
115
114
|
*/
|
|
116
|
-
export declare const GLOBAL_ENV_KEYS: readonly ["MIDSCENE_CACHE", "
|
|
115
|
+
export declare const GLOBAL_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET", "MIDSCENE_MODEL_MAX_TOKENS", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER"];
|
|
117
116
|
/**
|
|
118
117
|
* Model related eve keys, used for declare which model to use.
|
|
119
118
|
* Can be override by both agent.modelConfig and overrideAIConfig
|
|
120
119
|
* Can only be access after agent.constructor
|
|
121
120
|
*/
|
|
122
121
|
export declare const MODEL_ENV_KEYS: readonly ["MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_MODEL_TIMEOUT", "MIDSCENE_MODEL_TEMPERATURE", "MIDSCENE_MODEL_RETRY_COUNT", "MIDSCENE_MODEL_RETRY_INTERVAL", "MIDSCENE_MODEL_REASONING_EFFORT", "MIDSCENE_MODEL_REASONING_ENABLED", "MIDSCENE_MODEL_REASONING_BUDGET", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_MODEL_TIMEOUT", "MIDSCENE_INSIGHT_MODEL_TEMPERATURE", "MIDSCENE_INSIGHT_MODEL_RETRY_COUNT", "MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL", "MIDSCENE_INSIGHT_MODEL_FAMILY", "MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT", "MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED", "MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_MODEL_TIMEOUT", "MIDSCENE_PLANNING_MODEL_TEMPERATURE", "MIDSCENE_PLANNING_MODEL_RETRY_COUNT", "MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL", "MIDSCENE_PLANNING_MODEL_FAMILY", "MIDSCENE_PLANNING_MODEL_REASONING_EFFORT", "MIDSCENE_PLANNING_MODEL_REASONING_ENABLED", "MIDSCENE_PLANNING_MODEL_REASONING_BUDGET", "MIDSCENE_MODEL_FAMILY"];
|
|
123
|
-
export declare const ALL_ENV_KEYS: readonly [...string[], "MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR", "MIDSCENE_CACHE", "
|
|
122
|
+
export declare const ALL_ENV_KEYS: readonly [...string[], "MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR", "MIDSCENE_CACHE", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET", "MIDSCENE_MODEL_MAX_TOKENS", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER", "MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_MODEL_TIMEOUT", "MIDSCENE_MODEL_TEMPERATURE", "MIDSCENE_MODEL_RETRY_COUNT", "MIDSCENE_MODEL_RETRY_INTERVAL", "MIDSCENE_MODEL_REASONING_EFFORT", "MIDSCENE_MODEL_REASONING_ENABLED", "MIDSCENE_MODEL_REASONING_BUDGET", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_MODEL_TIMEOUT", "MIDSCENE_INSIGHT_MODEL_TEMPERATURE", "MIDSCENE_INSIGHT_MODEL_RETRY_COUNT", "MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL", "MIDSCENE_INSIGHT_MODEL_FAMILY", "MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT", "MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED", "MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_MODEL_TIMEOUT", "MIDSCENE_PLANNING_MODEL_TEMPERATURE", "MIDSCENE_PLANNING_MODEL_RETRY_COUNT", "MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL", "MIDSCENE_PLANNING_MODEL_FAMILY", "MIDSCENE_PLANNING_MODEL_REASONING_EFFORT", "MIDSCENE_PLANNING_MODEL_REASONING_ENABLED", "MIDSCENE_PLANNING_MODEL_REASONING_BUDGET", "MIDSCENE_MODEL_FAMILY"];
|
|
124
123
|
export type TEnvKeys = (typeof ALL_ENV_KEYS)[number];
|
|
125
124
|
export type TGlobalConfig = Record<TEnvKeys, string | undefined>;
|
|
126
125
|
export type TVlModeValues = 'qwen2.5-vl' | 'qwen3-vl' | 'qwen3.5' | 'doubao-vision' | 'doubao-seed' | 'gemini' | 'vlm-ui-tars' | 'vlm-ui-tars-doubao' | 'vlm-ui-tars-doubao-1.5' | 'glm-v' | 'auto-glm' | 'auto-glm-multilingual';
|
package/package.json
CHANGED
|
@@ -6,16 +6,16 @@ IMPORTANT: Follow these exact type signatures for AI functions:
|
|
|
6
6
|
|
|
7
7
|
// Type signatures for AI functions:
|
|
8
8
|
aiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<void>
|
|
9
|
-
aiInput(text: string, locate: string, options?: {
|
|
10
|
-
aiTap(locate: string, options?: {
|
|
11
|
-
aiHover(locate: string, options?: {
|
|
12
|
-
aiDoubleClick(locate: string, options?: {
|
|
13
|
-
aiKeyboardPress(key: string, locate?: string, options?: {
|
|
9
|
+
aiInput(text: string, locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
10
|
+
aiTap(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
11
|
+
aiHover(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
12
|
+
aiDoubleClick(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
13
|
+
aiKeyboardPress(key: string, locate?: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>
|
|
14
14
|
aiScroll(locate: string | undefined, options: {
|
|
15
15
|
direction?: 'up' | 'down' | 'left' | 'right',
|
|
16
16
|
scrollType?: 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft',
|
|
17
17
|
distance?: number | null,
|
|
18
|
-
|
|
18
|
+
deepLocate?: boolean,
|
|
19
19
|
xpath?: string,
|
|
20
20
|
cacheable?: boolean
|
|
21
21
|
}): Promise<void>
|
|
@@ -110,7 +110,7 @@ tasks:
|
|
|
110
110
|
3. Best Practices:
|
|
111
111
|
- Group related actions into logical tasks
|
|
112
112
|
- Use natural language descriptions
|
|
113
|
-
- Add
|
|
113
|
+
- Add deepLocate: true for complex interactions
|
|
114
114
|
- Keep task names concise but descriptive
|
|
115
115
|
|
|
116
116
|
4. CRITICAL - YAML Indentation Rules:
|
|
@@ -148,33 +148,33 @@ tasks:
|
|
|
148
148
|
|
|
149
149
|
# Tap an element described by a prompt.
|
|
150
150
|
- aiTap: <prompt>
|
|
151
|
-
|
|
151
|
+
deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
|
|
152
152
|
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
|
|
153
153
|
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
|
|
154
154
|
|
|
155
155
|
# Double click an element described by a prompt.
|
|
156
156
|
- aiDoubleClick: <prompt>
|
|
157
|
-
|
|
157
|
+
deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
|
|
158
158
|
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
|
|
159
159
|
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
|
|
160
160
|
|
|
161
161
|
# Hover over an element described by a prompt.
|
|
162
162
|
- aiHover: <prompt>
|
|
163
|
-
|
|
163
|
+
deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
|
|
164
164
|
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
|
|
165
165
|
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
|
|
166
166
|
|
|
167
167
|
# Input text into an element described by a prompt.
|
|
168
168
|
- aiInput: <final text content of the input>
|
|
169
169
|
locate: <prompt>
|
|
170
|
-
|
|
170
|
+
deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
|
|
171
171
|
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
|
|
172
172
|
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
|
|
173
173
|
|
|
174
174
|
# Press a key (e.g., Enter, Tab, Escape) on an element described by a prompt.
|
|
175
175
|
- aiKeyboardPress: <key>
|
|
176
176
|
locate: <prompt>
|
|
177
|
-
|
|
177
|
+
deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
|
|
178
178
|
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
|
|
179
179
|
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
|
|
180
180
|
|
|
@@ -184,7 +184,7 @@ tasks:
|
|
|
184
184
|
scrollType: 'singleAction' # or 'scrollToTop' | 'scrollToBottom' | 'scrollToLeft' | 'scrollToRight'
|
|
185
185
|
distance: <number> # Optional, the scroll distance in pixels.
|
|
186
186
|
locate: <prompt> # Optional, the element to scroll on.
|
|
187
|
-
|
|
187
|
+
deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.
|
|
188
188
|
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
|
|
189
189
|
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
|
|
190
190
|
|
package/src/env/types.ts
CHANGED
|
@@ -12,7 +12,6 @@ export const MIDSCENE_MCP_USE_PUPPETEER_MODE =
|
|
|
12
12
|
export const MIDSCENE_MCP_CHROME_PATH = 'MIDSCENE_MCP_CHROME_PATH';
|
|
13
13
|
export const MIDSCENE_MCP_ANDROID_MODE = 'MIDSCENE_MCP_ANDROID_MODE';
|
|
14
14
|
export const DOCKER_CONTAINER = 'DOCKER_CONTAINER';
|
|
15
|
-
export const MIDSCENE_FORCE_DEEP_THINK = 'MIDSCENE_FORCE_DEEP_THINK';
|
|
16
15
|
|
|
17
16
|
// Observability
|
|
18
17
|
export const MIDSCENE_LANGSMITH_DEBUG = 'MIDSCENE_LANGSMITH_DEBUG';
|
|
@@ -164,7 +163,6 @@ export const BASIC_ENV_KEYS = [
|
|
|
164
163
|
|
|
165
164
|
export const BOOLEAN_ENV_KEYS = [
|
|
166
165
|
MIDSCENE_CACHE,
|
|
167
|
-
MIDSCENE_FORCE_DEEP_THINK,
|
|
168
166
|
MIDSCENE_MCP_USE_PUPPETEER_MODE,
|
|
169
167
|
MIDSCENE_MCP_ANDROID_MODE,
|
|
170
168
|
MIDSCENE_LANGSMITH_DEBUG,
|