mobai-mcp 2.3.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.js +32 -1
- package/dist/resources.js +38 -6
- package/package.json +1 -1
- package/server.json +2 -2
package/README.md
CHANGED
|
@@ -140,7 +140,7 @@ Pass this as the `commands` argument (a JSON string) to `execute_dsl` along with
|
|
|
140
140
|
|
|
141
141
|
## Troubleshooting
|
|
142
142
|
|
|
143
|
-
**"Connection refused"** — Make sure the MobAI desktop app is running and the API is reachable at `http://127.0.0.1:8686`.
|
|
143
|
+
**"Connection refused" / "Could not reach the MobAI desktop app"** — Make sure the MobAI desktop app is installed and running, and the API is reachable at `http://127.0.0.1:8686`. If you don't have it yet, download and install it from [https://mobai.run/download](https://mobai.run/download).
|
|
144
144
|
|
|
145
145
|
**"Bridge not running"** — Call `start_bridge` first. The iOS bridge can take up to a minute to come up.
|
|
146
146
|
|
package/dist/index.js
CHANGED
|
@@ -15,6 +15,28 @@ import { RESOURCES, getResourceContent } from "./resources.js";
|
|
|
15
15
|
const API_BASE_URL = "http://127.0.0.1:8686/api/v1";
|
|
16
16
|
const DEFAULT_TIMEOUT_MS = 300000; // 5 minutes (matches Go httpClient timeout)
|
|
17
17
|
const SCREENSHOT_DIR = path.join(os.tmpdir(), "mobai", "screenshots");
|
|
18
|
+
const DOWNLOAD_URL = "https://mobai.run/download";
|
|
19
|
+
// Message shown when the MobAI desktop app is not reachable at its local API.
|
|
20
|
+
const APP_NOT_RUNNING_MESSAGE = `Could not reach the MobAI desktop app at 127.0.0.1:8686. ` +
|
|
21
|
+
`Make sure the MobAI desktop app is installed and running, then try again. ` +
|
|
22
|
+
`If you don't have it yet, download and install it from ${DOWNLOAD_URL}.`;
|
|
23
|
+
/**
|
|
24
|
+
* Detects the "connection refused" / "could not connect" family of errors that
|
|
25
|
+
* Node's fetch throws when nothing is listening on the MobAI API port. These
|
|
26
|
+
* surface as a TypeError ("fetch failed") whose `cause` carries an errno code
|
|
27
|
+
* such as ECONNREFUSED / ENOTFOUND / ECONNRESET.
|
|
28
|
+
*/
|
|
29
|
+
function isConnectionError(err) {
|
|
30
|
+
if (!(err instanceof Error))
|
|
31
|
+
return false;
|
|
32
|
+
const codes = ["ECONNREFUSED", "ENOTFOUND", "ECONNRESET", "EHOSTUNREACH", "ETIMEDOUT"];
|
|
33
|
+
const cause = err.cause;
|
|
34
|
+
const causeCode = cause && typeof cause === "object" ? cause.code : undefined;
|
|
35
|
+
if (typeof causeCode === "string" && codes.includes(causeCode))
|
|
36
|
+
return true;
|
|
37
|
+
// Fallback: undici reports a bare "fetch failed" TypeError for these.
|
|
38
|
+
return err.name === "TypeError" && /fetch failed/i.test(err.message);
|
|
39
|
+
}
|
|
18
40
|
// ---------------------------------------------------------------------------
|
|
19
41
|
// Screenshot helpers
|
|
20
42
|
// ---------------------------------------------------------------------------
|
|
@@ -55,7 +77,16 @@ async function doRequest(method, urlPath, payload, timeoutMs = DEFAULT_TIMEOUT_M
|
|
|
55
77
|
if (payload !== undefined && ["POST", "PUT", "PATCH"].includes(method)) {
|
|
56
78
|
opts.body = typeof payload === "string" ? payload : JSON.stringify(payload);
|
|
57
79
|
}
|
|
58
|
-
|
|
80
|
+
let response;
|
|
81
|
+
try {
|
|
82
|
+
response = await fetch(url, opts);
|
|
83
|
+
}
|
|
84
|
+
catch (err) {
|
|
85
|
+
if (isConnectionError(err)) {
|
|
86
|
+
throw new Error(APP_NOT_RUNNING_MESSAGE);
|
|
87
|
+
}
|
|
88
|
+
throw err;
|
|
89
|
+
}
|
|
59
90
|
clearTimeout(timeoutId);
|
|
60
91
|
const text = await response.text();
|
|
61
92
|
let body;
|
package/dist/resources.js
CHANGED
|
@@ -123,7 +123,7 @@ const DEVICE_AUTOMATION_REF = `<device-automation-reference>
|
|
|
123
123
|
<screenshot-tools>
|
|
124
124
|
get_screenshot — fast low-quality image for LLM visual analysis.
|
|
125
125
|
save_screenshot — full-quality PNG for reporting, debugging, or sharing.
|
|
126
|
-
|
|
126
|
+
A screenshot is a single settled frame — it cannot capture motion. Anything transient (animations, transitions, loading spinners; a screen transition is often only ~300ms) will be missed or caught mid-frame. To verify transitional behavior, use record_start/record_stop, which samples continuously and flags suspicious frames.
|
|
127
127
|
</screenshot-tools>
|
|
128
128
|
|
|
129
129
|
<infinite-scrolling>To collect data from infinite-scrolling views (feeds, search results), scroll to load a batch first, then observe with only_visible:false to get all loaded items in one go.</infinite-scrolling>
|
|
@@ -132,6 +132,7 @@ const DEVICE_AUTOMATION_REF = `<device-automation-reference>
|
|
|
132
132
|
Element not visible — use scroll with to_element to find it.
|
|
133
133
|
App launches and page transitions take time — use wait_for or delay.
|
|
134
134
|
Observe before acting on unfamiliar screens.
|
|
135
|
+
NO_MATCH / failed assert_exists: if the element exists off-screen, the error lists it under "candidates" — scroll to bring it into view (off-screen elements cannot be tapped). Empty candidates means it is genuinely absent or not yet rendered.
|
|
135
136
|
</troubleshooting>
|
|
136
137
|
</guide>
|
|
137
138
|
|
|
@@ -145,6 +146,8 @@ const DEVICE_AUTOMATION_REF = `<device-automation-reference>
|
|
|
145
146
|
<field name="text_contains" type="string">Substring, case-insensitive — preferred for most matching</field>
|
|
146
147
|
<field name="text_starts_with" type="string">Prefix match</field>
|
|
147
148
|
<field name="text_regex" type="string">Regex pattern — use for dynamic text (numbers, dates, counts)</field>
|
|
149
|
+
<field name="value" type="string">Exact match on the element's entered/current value (not its label/placeholder). Use to verify what was typed into a field — text matching sees the placeholder, value sees the content. Shown as content="..." in the UI tree. Secure fields are masked, so only length/non-empty is meaningful.</field>
|
|
150
|
+
<field name="value_contains" type="string">Substring match (case-insensitive) on the entered/current value</field>
|
|
148
151
|
<field name="type" type="string">button, input, switch, text, image, cell, scrollview</field>
|
|
149
152
|
<field name="accessibility_id" type="string">Exact match on the #id shown in UI tree (without the # prefix)</field>
|
|
150
153
|
<field name="enabled" type="bool">Enabled state</field>
|
|
@@ -173,8 +176,11 @@ const DEVICE_AUTOMATION_REF = `<device-automation-reference>
|
|
|
173
176
|
|
|
174
177
|
<action name="open_app">
|
|
175
178
|
<field name="bundle_id" required="yes"/>
|
|
179
|
+
<field name="fresh" type="bool">Kill the app before launching to ensure a clean start from the home screen. Use when the app may have been left on an arbitrary screen from a previous run.</field>
|
|
180
|
+
<field name="debug" type="bool">ONLY for debug-built apps (e.g. Flutter dev builds, Xcode debug builds) that need a debugger attached to run. Attaches debugserver, streams stdout/stderr to a log file; result has log_path. Do NOT use for release/App Store apps — they launch fine with debug: false.</field>
|
|
176
181
|
<example>{"action": "open_app", "bundle_id": "com.apple.Preferences"}</example>
|
|
177
|
-
<
|
|
182
|
+
<example>{"action": "open_app", "bundle_id": "com.apple.Preferences", "fresh": true}</example>
|
|
183
|
+
<note>If open_app fails or the app disappears immediately after launch, the app has likely crashed. Do NOT retry or try alternative launch methods — start crash investigation instead. Use debug: true (or metrics_start with capture_logs: true) to capture device logs, then diagnose.</note>
|
|
178
184
|
</action>
|
|
179
185
|
|
|
180
186
|
<action name="tap">
|
|
@@ -211,6 +217,12 @@ const DEVICE_AUTOMATION_REF = `<device-automation-reference>
|
|
|
211
217
|
<example>{"action": "type", "text": "Hello", "predicate": {"type": "input"}, "clear_first": true}</example>
|
|
212
218
|
</action>
|
|
213
219
|
|
|
220
|
+
<action name="clear">
|
|
221
|
+
Clear a field's text without typing. With a predicate, focuses that field first; without one, clears the currently focused field.
|
|
222
|
+
<field name="predicate">Optional target element</field>
|
|
223
|
+
<example>{"action": "clear", "predicate": {"type": "input"}}</example>
|
|
224
|
+
</action>
|
|
225
|
+
|
|
214
226
|
<action name="swipe">
|
|
215
227
|
Direction = finger movement. Use direction OR from_coords/to_coords.
|
|
216
228
|
<field name="direction">up, down, left, right</field>
|
|
@@ -243,6 +255,12 @@ const DEVICE_AUTOMATION_REF = `<device-automation-reference>
|
|
|
243
255
|
<example>{"action": "drag", "from": {"predicate": {"text": "App"}}, "to_element": {"predicate": {"text": "Folder"}}, "press_duration_ms": 500, "hold_duration_ms": 200}</example>
|
|
244
256
|
</action>
|
|
245
257
|
|
|
258
|
+
<action name="drag_path">
|
|
259
|
+
<field name="points" type="array of {x, y, duration_ms}" required="true">Single-finger drag along a multi-point path. Each point's duration_ms is the time to move to it from the previous point. The first point is the touch-down location and its duration_ms is an optional initial press-hold (omit or 0 for none). Needs at least 2 points; every point after the first must have duration_ms > 0. Use this (not drag) for swipe-path gestures like unlock patterns, freeform draws, or curved scrolls.</field>
|
|
260
|
+
<example>{"action": "drag_path", "points": [{"x": 100, "y": 400}, {"x": 150, "y": 300, "duration_ms": 150}, {"x": 300, "y": 500, "duration_ms": 300}]}</example>
|
|
261
|
+
<example>{"action": "drag_path", "points": [{"x": 100, "y": 400, "duration_ms": 200}, {"x": 300, "y": 400, "duration_ms": 250}]}</example>
|
|
262
|
+
</action>
|
|
263
|
+
|
|
246
264
|
<action name="press_key">
|
|
247
265
|
<field name="key" required="yes"/>
|
|
248
266
|
<platform name="android">enter, tab, delete, escape, volume_up, volume_down, home, back, recent_apps, mute, power, play_pause, next, previous</platform>
|
|
@@ -320,7 +338,7 @@ const DEVICE_AUTOMATION_REF = `<device-automation-reference>
|
|
|
320
338
|
</action>
|
|
321
339
|
|
|
322
340
|
<action name="siri">
|
|
323
|
-
iOS only. Sends a voice command to Siri
|
|
341
|
+
iOS only. Sends a voice command to Siri service on iOS devices. Auto-approves consent dialogs, captures Siri's response text, then dismisses the Siri UI.
|
|
324
342
|
Use for triggering SiriKit intents and App Shortcuts registered by apps (media playback, messaging, banking shortcuts, etc.).
|
|
325
343
|
The captured response is stored in "siri_response" and returned in the step result. If Siri asks a follow-up question, reformulate the prompt with more detail and call siri again.
|
|
326
344
|
<field name="prompt" required="yes">Voice command text</field>
|
|
@@ -383,6 +401,15 @@ const DEVICE_AUTOMATION_REF = `<device-automation-reference>
|
|
|
383
401
|
<example>{"action": "assert_screen_changed", "threshold_percent": 15}</example>
|
|
384
402
|
<note>Pattern: observe(screenshot) then action then delay then assert_screen_changed. Do NOT observe after the action — it resets the baseline.</note>
|
|
385
403
|
</action>
|
|
404
|
+
|
|
405
|
+
<action name="ai_assert">
|
|
406
|
+
<field name="assert_prompt" required="yes"/>
|
|
407
|
+
<field name="include" type="[]string" note="opt-in extra context: screenshot, ocr (iOS). UI tree + the source script are always included."/>
|
|
408
|
+
<field name="timeout_ms" type="int" note="bounds the verdict (LLM/CLI reply), excluding context gathering. Default 60000."/>
|
|
409
|
+
<field name="message" note="prefixes the failure reason"/>
|
|
410
|
+
<example>{"action": "ai_assert", "assert_prompt": "the reply answers the user's question and is not an error", "include": ["screenshot"]}</example>
|
|
411
|
+
<note>Judges a natural-language assertion with the user's configured agent — either an LLM API provider (direct call) or Claude Code (spawned, reports back via report_assertion). Use for non-deterministic content (AI/LLM output, dynamic feeds) where exact-match assertions don't work. Treat as a soft assertion — it is non-deterministic.</note>
|
|
412
|
+
</action>
|
|
386
413
|
</assertions>
|
|
387
414
|
|
|
388
415
|
<metrics>
|
|
@@ -495,6 +522,7 @@ const TESTING_REF = `<testing-reference>
|
|
|
495
522
|
|
|
496
523
|
<actions>
|
|
497
524
|
app "com.example.app" — launch app
|
|
525
|
+
app "com.example.app" fresh — kill + launch for clean state
|
|
498
526
|
kill_app "com.example.app" — force-close app
|
|
499
527
|
tap "Text" — tap by text
|
|
500
528
|
tap "Field" near "Label" — tap near another element
|
|
@@ -526,6 +554,7 @@ const TESTING_REF = `<testing-reference>
|
|
|
526
554
|
drag "Item" to "Trash" — drag element
|
|
527
555
|
drag 100,200 to 300,400 duration:500 — coordinate drag
|
|
528
556
|
drag "App" to "Folder" press_duration:500 hold_duration:200 — press-hold-move-hold-release
|
|
557
|
+
drag_path 100,400 150,300:150 300,500:300 - multi-point path (X,Y:moveMs, first point's :ms = optional press-hold)
|
|
529
558
|
wait_for "Element" timeout:5000 — wait for element
|
|
530
559
|
wait_for type:button bounds:bottom_half timeout:3000 — modifier-only
|
|
531
560
|
delay 1000 — wait N ms
|
|
@@ -548,6 +577,8 @@ const TESTING_REF = `<testing-reference>
|
|
|
548
577
|
assert_exists "Element" — element is on screen
|
|
549
578
|
assert_not_exists "Element" — element is NOT on screen
|
|
550
579
|
assert_exists "Header" bounds:top_right — with region filter
|
|
580
|
+
assert_exists value:"hello" — assert a field's entered value (exact); sees typed content, not placeholder
|
|
581
|
+
assert_exists value_contains:"@mail" — assert a substring of the entered value
|
|
551
582
|
assert_count "Cell" expected:5 — element count
|
|
552
583
|
checkpoint "name" — mark checkpoint
|
|
553
584
|
</assertions>
|
|
@@ -620,9 +651,10 @@ const TESTING_REF = `<testing-reference>
|
|
|
620
651
|
When the user asks to create an API from a mobile app flow:
|
|
621
652
|
1. Observe the app and understand the flow
|
|
622
653
|
2. Write a .mob script with # Param: declarations for inputs and extract actions for outputs
|
|
623
|
-
3.
|
|
624
|
-
4.
|
|
625
|
-
5.
|
|
654
|
+
3. Use app "bundle.id" fresh to ensure a clean start — the app may be left on any screen from a previous call
|
|
655
|
+
4. Save it to {MOBAI_DATA_DIR}/apis/{name}.mob — flat (gmail-send.mob) or nested (gmail/send.mob)
|
|
656
|
+
5. Test it with test_run using project_dir: {MOBAI_DATA_DIR}/apis/ and case_path: {name}.mob
|
|
657
|
+
6. List available APIs: GET /api/v1/apis
|
|
626
658
|
Call an API: POST /api/v1/apis/run/{name} with {"device_id": "...", "params": {...}}
|
|
627
659
|
The {name} segment is the path inside apis/ minus the .mob extension.
|
|
628
660
|
API runs do not persist results to .mobai/runs/ — only the extracted values come back in the response.
|
package/package.json
CHANGED
package/server.json
CHANGED
|
@@ -6,12 +6,12 @@
|
|
|
6
6
|
"url": "https://github.com/MobAI-App/mobai-mcp",
|
|
7
7
|
"source": "github"
|
|
8
8
|
},
|
|
9
|
-
"version": "2.3.
|
|
9
|
+
"version": "2.3.1",
|
|
10
10
|
"packages": [
|
|
11
11
|
{
|
|
12
12
|
"registryType": "npm",
|
|
13
13
|
"identifier": "mobai-mcp",
|
|
14
|
-
"version": "2.3.
|
|
14
|
+
"version": "2.3.1",
|
|
15
15
|
"transport": {
|
|
16
16
|
"type": "stdio"
|
|
17
17
|
}
|