@midscene/mcp 0.19.1 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/API.mdx +46 -9
- package/dist/index.cjs +463 -383
- package/dist/index.js +463 -383
- package/package.json +4 -4
package/dist/API.mdx
CHANGED
|
@@ -108,7 +108,8 @@ function aiTap(locate: string, options?: Object): Promise<void>;
|
|
|
108
108
|
|
|
109
109
|
- `locate: string` - A natural language description of the element to tap.
|
|
110
110
|
- `options?: Object` - Optional, a configuration object containing:
|
|
111
|
-
- `deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element.
|
|
111
|
+
- `deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element. False by default.
|
|
112
|
+
- `xpath?: string` - The xpath of the element to operate. If provided, Midscene will first use this xpath to locate the element before using the cache and the AI model. Empty by default.
|
|
112
113
|
- `cacheable?: boolean` - Whether cacheable when enabling [caching feature](./caching.mdx). True by default.
|
|
113
114
|
|
|
114
115
|
- Return Value:
|
|
@@ -140,7 +141,8 @@ function aiHover(locate: string, options?: Object): Promise<void>;
|
|
|
140
141
|
|
|
141
142
|
- `locate: string` - A natural language description of the element to hover over.
|
|
142
143
|
- `options?: Object` - Optional, a configuration object containing:
|
|
143
|
-
- `deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element.
|
|
144
|
+
- `deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element. False by default.
|
|
145
|
+
- `xpath?: string` - The xpath of the element to operate. If provided, Midscene will first use this xpath to locate the element before using the cache and the AI model. Empty by default.
|
|
144
146
|
- `cacheable?: boolean` - Whether cacheable when enabling [caching feature](./caching.mdx). True by default.
|
|
145
147
|
|
|
146
148
|
- Return Value:
|
|
@@ -168,7 +170,8 @@ function aiInput(text: string, locate: string, options?: Object): Promise<void>;
|
|
|
168
170
|
- `text: string` - The final text content that should be placed in the input element. Use blank string to clear the input.
|
|
169
171
|
- `locate: string` - A natural language description of the element to input text into.
|
|
170
172
|
- `options?: Object` - Optional, a configuration object containing:
|
|
171
|
-
- `deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element.
|
|
173
|
+
- `deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element. False by default.
|
|
174
|
+
- `xpath?: string` - The xpath of the element to operate. If provided, Midscene will first use this xpath to locate the element before using the cache and the AI model. Empty by default.
|
|
172
175
|
- `cacheable?: boolean` - Whether cacheable when enabling [caching feature](./caching.mdx). True by default.
|
|
173
176
|
- `autoDismissKeyboard?: boolean` - If true, the keyboard will be dismissed after input text, only available in Android. (Default: true)
|
|
174
177
|
|
|
@@ -201,7 +204,8 @@ function aiKeyboardPress(
|
|
|
201
204
|
- `key: string` - The web key to press, e.g. 'Enter', 'Tab', 'Escape', etc. Key Combination is not supported.
|
|
202
205
|
- `locate?: string` - Optional, a natural language description of the element to press the key on.
|
|
203
206
|
- `options?: Object` - Optional, a configuration object containing:
|
|
204
|
-
- `deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element.
|
|
207
|
+
- `deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element. False by default.
|
|
208
|
+
- `xpath?: string` - The xpath of the element to operate. If provided, Midscene will first use this xpath to locate the element before using the cache and the AI model. Empty by default.
|
|
205
209
|
- `cacheable?: boolean` - Whether cacheable when enabling [caching feature](./caching.mdx). True by default.
|
|
206
210
|
|
|
207
211
|
- Return Value:
|
|
@@ -236,7 +240,8 @@ function aiScroll(
|
|
|
236
240
|
- `distance: number` - Optional, the distance to scroll in px.
|
|
237
241
|
- `locate?: string` - Optional, a natural language description of the element to scroll on. If not provided, Midscene will perform scroll on the current mouse position.
|
|
238
242
|
- `options?: Object` - Optional, a configuration object containing:
|
|
239
|
-
- `deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element.
|
|
243
|
+
- `deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element. False by default.
|
|
244
|
+
- `xpath?: string` - The xpath of the element to operate. If provided, Midscene will first use this xpath to locate the element before using the cache and the AI model. Empty by default.
|
|
240
245
|
- `cacheable?: boolean` - Whether cacheable when enabling [caching feature](./caching.mdx). True by default.
|
|
241
246
|
|
|
242
247
|
- Return Value:
|
|
@@ -266,7 +271,8 @@ function aiRightClick(locate: string, options?: Object): Promise<void>;
|
|
|
266
271
|
|
|
267
272
|
- `locate: string` - A natural language description of the element to right-click on.
|
|
268
273
|
- `options?: Object` - Optional, a configuration object containing:
|
|
269
|
-
- `deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element.
|
|
274
|
+
- `deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element. False by default.
|
|
275
|
+
- `xpath?: string` - The xpath of the element to operate. If provided, Midscene will first use this xpath to locate the element before using the cache and the AI model. Empty by default.
|
|
270
276
|
- `cacheable?: boolean` - Whether cacheable when enabling [caching feature](./caching.mdx). True by default.
|
|
271
277
|
|
|
272
278
|
- Return Value:
|
|
@@ -286,15 +292,45 @@ await agent.aiRightClick('The file name at the top of the page', {
|
|
|
286
292
|
|
|
287
293
|
:::tip About the `deepThink` feature
|
|
288
294
|
|
|
289
|
-
The `deepThink` feature is a powerful feature that allows Midscene to call AI model twice to precisely locate the element. It is useful when the AI model find it hard to distinguish the element from its surroundings.
|
|
295
|
+
The `deepThink` feature is a powerful feature that allows Midscene to call AI model twice to precisely locate the element. False by default. It is useful when the AI model find it hard to distinguish the element from its surroundings.
|
|
290
296
|
|
|
291
297
|
:::
|
|
292
298
|
|
|
293
299
|
## Data Extraction
|
|
294
300
|
|
|
301
|
+
### `agent.aiAsk()`
|
|
302
|
+
|
|
303
|
+
Ask the AI model any question about the current page. It returns the answer in string from the AI model.
|
|
304
|
+
|
|
305
|
+
- Type
|
|
306
|
+
|
|
307
|
+
```typescript
|
|
308
|
+
function aiAsk(prompt: string, options?: Object): Promise<string>;
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
- Parameters:
|
|
312
|
+
|
|
313
|
+
- `prompt: string` - A natural language description of the question.
|
|
314
|
+
- `options?: Object` - Optional, a configuration object containing:
|
|
315
|
+
- `domIncluded?: boolean | 'visible-only'` - Whether to send simplified DOM information to the model, usually used for extracting invisible attributes like image links. If set to `'visible-only'`, only the visible elements will be sent. Default: False.
|
|
316
|
+
- `screenshotIncluded?: boolean` - Whether to send screenshot to the model. Default: True.
|
|
317
|
+
|
|
318
|
+
- Return Value:
|
|
319
|
+
|
|
320
|
+
- Return a Promise. Return the answer from the AI model.
|
|
321
|
+
|
|
322
|
+
- Examples:
|
|
323
|
+
|
|
324
|
+
```typescript
|
|
325
|
+
const result = await agent.aiAsk('What should I do to test this page?');
|
|
326
|
+
console.log(result); // Output the answer from the AI model
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
Besides `aiAsk`, you can also use `aiQuery` to extract structured data from the UI.
|
|
330
|
+
|
|
295
331
|
### `agent.aiQuery()`
|
|
296
332
|
|
|
297
|
-
This method allows you to extract data
|
|
333
|
+
This method allows you to extract structured data from current page. Simply define the expected format (e.g., string, number, JSON, or an array) in the `dataDemand`, and Midscene will return a result that matches the format.
|
|
298
334
|
|
|
299
335
|
- Type
|
|
300
336
|
|
|
@@ -501,7 +537,8 @@ function aiLocate(
|
|
|
501
537
|
|
|
502
538
|
- `locate: string` - A natural language description of the element to locate.
|
|
503
539
|
- `options?: Object` - Optional, a configuration object containing:
|
|
504
|
-
- `deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element.
|
|
540
|
+
- `deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element. False by default.
|
|
541
|
+
- `xpath?: string` - The xpath of the element to operate. If provided, Midscene will first use this xpath to locate the element before using the cache and the AI model. Empty by default.
|
|
505
542
|
- `cacheable?: boolean` - Whether cacheable when enabling [caching feature](./caching.mdx). True by default.
|
|
506
543
|
|
|
507
544
|
- Return Value:
|