mobai-mcp 2.3.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -140,7 +140,7 @@ Pass this as the `commands` argument (a JSON string) to `execute_dsl` along with
140
140
 
141
141
  ## Troubleshooting
142
142
 
143
- **"Connection refused"** — Make sure the MobAI desktop app is running and the API is reachable at `http://127.0.0.1:8686`.
143
+ **"Connection refused" / "Could not reach the MobAI desktop app"** — Make sure the MobAI desktop app is installed and running, and the API is reachable at `http://127.0.0.1:8686`. If you don't have it yet, download and install it from [https://mobai.run/download](https://mobai.run/download).
144
144
 
145
145
  **"Bridge not running"** — Call `start_bridge` first. The iOS bridge can take up to a minute to come up.
146
146
 
package/dist/index.js CHANGED
@@ -15,6 +15,28 @@ import { RESOURCES, getResourceContent } from "./resources.js";
15
15
  const API_BASE_URL = "http://127.0.0.1:8686/api/v1";
16
16
  const DEFAULT_TIMEOUT_MS = 300000; // 5 minutes (matches Go httpClient timeout)
17
17
  const SCREENSHOT_DIR = path.join(os.tmpdir(), "mobai", "screenshots");
18
+ const DOWNLOAD_URL = "https://mobai.run/download";
19
+ // Message shown when the MobAI desktop app is not reachable at its local API.
20
+ const APP_NOT_RUNNING_MESSAGE = `Could not reach the MobAI desktop app at 127.0.0.1:8686. ` +
21
+ `Make sure the MobAI desktop app is installed and running, then try again. ` +
22
+ `If you don't have it yet, download and install it from ${DOWNLOAD_URL}.`;
23
+ /**
24
+ * Detects the "connection refused" / "could not connect" family of errors that
25
+ * Node's fetch throws when nothing is listening on the MobAI API port. These
26
+ * surface as a TypeError ("fetch failed") whose `cause` carries an errno code
27
+ * such as ECONNREFUSED / ENOTFOUND / ECONNRESET.
28
+ */
29
+ function isConnectionError(err) {
30
+ if (!(err instanceof Error))
31
+ return false;
32
+ const codes = ["ECONNREFUSED", "ENOTFOUND", "ECONNRESET", "EHOSTUNREACH", "ETIMEDOUT"];
33
+ const cause = err.cause;
34
+ const causeCode = cause && typeof cause === "object" ? cause.code : undefined;
35
+ if (typeof causeCode === "string" && codes.includes(causeCode))
36
+ return true;
37
+ // Fallback: undici reports a bare "fetch failed" TypeError for these.
38
+ return err.name === "TypeError" && /fetch failed/i.test(err.message);
39
+ }
18
40
  // ---------------------------------------------------------------------------
19
41
  // Screenshot helpers
20
42
  // ---------------------------------------------------------------------------
@@ -55,7 +77,16 @@ async function doRequest(method, urlPath, payload, timeoutMs = DEFAULT_TIMEOUT_M
55
77
  if (payload !== undefined && ["POST", "PUT", "PATCH"].includes(method)) {
56
78
  opts.body = typeof payload === "string" ? payload : JSON.stringify(payload);
57
79
  }
58
- const response = await fetch(url, opts);
80
+ let response;
81
+ try {
82
+ response = await fetch(url, opts);
83
+ }
84
+ catch (err) {
85
+ if (isConnectionError(err)) {
86
+ throw new Error(APP_NOT_RUNNING_MESSAGE);
87
+ }
88
+ throw err;
89
+ }
59
90
  clearTimeout(timeoutId);
60
91
  const text = await response.text();
61
92
  let body;
package/dist/resources.js CHANGED
@@ -123,7 +123,7 @@ const DEVICE_AUTOMATION_REF = `<device-automation-reference>
123
123
  <screenshot-tools>
124
124
  get_screenshot — fast low-quality image for LLM visual analysis.
125
125
  save_screenshot — full-quality PNG for reporting, debugging, or sharing.
126
- To verify animations and UI transitions, use record_start/record_stop.
126
+ A screenshot is a single settled frame — it cannot capture motion. Anything transient (animations, transitions, loading spinners; a screen transition is often only ~300ms) will be missed or caught mid-frame. To verify transitional behavior, use record_start/record_stop, which samples continuously and flags suspicious frames.
127
127
  </screenshot-tools>
128
128
 
129
129
  <infinite-scrolling>To collect data from infinite-scrolling views (feeds, search results), scroll to load a batch first, then observe with only_visible:false to get all loaded items in one go.</infinite-scrolling>
@@ -132,6 +132,7 @@ const DEVICE_AUTOMATION_REF = `<device-automation-reference>
132
132
  Element not visible — use scroll with to_element to find it.
133
133
  App launches and page transitions take time — use wait_for or delay.
134
134
  Observe before acting on unfamiliar screens.
135
+ NO_MATCH / failed assert_exists: if the element exists off-screen, the error lists it under "candidates" — scroll to bring it into view (off-screen elements cannot be tapped). Empty candidates means it is genuinely absent or not yet rendered.
135
136
  </troubleshooting>
136
137
  </guide>
137
138
 
@@ -145,6 +146,8 @@ const DEVICE_AUTOMATION_REF = `<device-automation-reference>
145
146
  <field name="text_contains" type="string">Substring, case-insensitive — preferred for most matching</field>
146
147
  <field name="text_starts_with" type="string">Prefix match</field>
147
148
  <field name="text_regex" type="string">Regex pattern — use for dynamic text (numbers, dates, counts)</field>
149
+ <field name="value" type="string">Exact match on the element's entered/current value (not its label/placeholder). Use to verify what was typed into a field — text matching sees the placeholder, value sees the content. Shown as content="..." in the UI tree. Secure fields are masked, so only length/non-empty is meaningful.</field>
150
+ <field name="value_contains" type="string">Substring match (case-insensitive) on the entered/current value</field>
148
151
  <field name="type" type="string">button, input, switch, text, image, cell, scrollview</field>
149
152
  <field name="accessibility_id" type="string">Exact match on the #id shown in UI tree (without the # prefix)</field>
150
153
  <field name="enabled" type="bool">Enabled state</field>
@@ -173,8 +176,11 @@ const DEVICE_AUTOMATION_REF = `<device-automation-reference>
173
176
 
174
177
  <action name="open_app">
175
178
  <field name="bundle_id" required="yes"/>
179
+ <field name="fresh" type="bool">Kill the app before launching to ensure a clean start from the home screen. Use when the app may have been left on an arbitrary screen from a previous run.</field>
180
+ <field name="debug" type="bool">ONLY for debug-built apps (e.g. Flutter dev builds, Xcode debug builds) that need a debugger attached to run. Attaches debugserver, streams stdout/stderr to a log file; result has log_path. Do NOT use for release/App Store apps — they launch fine with debug: false.</field>
176
181
  <example>{"action": "open_app", "bundle_id": "com.apple.Preferences"}</example>
177
- <note>If open_app fails or the app disappears immediately after launch, the app has likely crashed. Do NOT retry or try alternative launch methods — start crash investigation instead. Use metrics_start with capture_logs: true to capture device logs, then diagnose.</note>
182
+ <example>{"action": "open_app", "bundle_id": "com.apple.Preferences", "fresh": true}</example>
183
+ <note>If open_app fails or the app disappears immediately after launch, the app has likely crashed. Do NOT retry or try alternative launch methods — start crash investigation instead. Use debug: true (or metrics_start with capture_logs: true) to capture device logs, then diagnose.</note>
178
184
  </action>
179
185
 
180
186
  <action name="tap">
@@ -211,6 +217,12 @@ const DEVICE_AUTOMATION_REF = `<device-automation-reference>
211
217
  <example>{"action": "type", "text": "Hello", "predicate": {"type": "input"}, "clear_first": true}</example>
212
218
  </action>
213
219
 
220
+ <action name="clear">
221
+ Clear a field's text without typing. With a predicate, focuses that field first; without one, clears the currently focused field.
222
+ <field name="predicate">Optional target element</field>
223
+ <example>{"action": "clear", "predicate": {"type": "input"}}</example>
224
+ </action>
225
+
214
226
  <action name="swipe">
215
227
  Direction = finger movement. Use direction OR from_coords/to_coords.
216
228
  <field name="direction">up, down, left, right</field>
@@ -243,6 +255,12 @@ const DEVICE_AUTOMATION_REF = `<device-automation-reference>
243
255
  <example>{"action": "drag", "from": {"predicate": {"text": "App"}}, "to_element": {"predicate": {"text": "Folder"}}, "press_duration_ms": 500, "hold_duration_ms": 200}</example>
244
256
  </action>
245
257
 
258
+ <action name="drag_path">
259
+ <field name="points" type="array of {x, y, duration_ms}" required="true">Single-finger drag along a multi-point path. Each point's duration_ms is the time to move to it from the previous point. The first point is the touch-down location and its duration_ms is an optional initial press-hold (omit or 0 for none). Needs at least 2 points; every point after the first must have duration_ms > 0. Use this (not drag) for swipe-path gestures like unlock patterns, freeform draws, or curved scrolls.</field>
260
+ <example>{"action": "drag_path", "points": [{"x": 100, "y": 400}, {"x": 150, "y": 300, "duration_ms": 150}, {"x": 300, "y": 500, "duration_ms": 300}]}</example>
261
+ <example>{"action": "drag_path", "points": [{"x": 100, "y": 400, "duration_ms": 200}, {"x": 300, "y": 400, "duration_ms": 250}]}</example>
262
+ </action>
263
+
246
264
  <action name="press_key">
247
265
  <field name="key" required="yes"/>
248
266
  <platform name="android">enter, tab, delete, escape, volume_up, volume_down, home, back, recent_apps, mute, power, play_pause, next, previous</platform>
@@ -320,7 +338,7 @@ const DEVICE_AUTOMATION_REF = `<device-automation-reference>
320
338
  </action>
321
339
 
322
340
  <action name="siri">
323
- iOS only. Sends a voice command to Siri via XCUISiriService. Auto-approves consent dialogs, captures Siri's response text, then dismisses the Siri UI.
341
+ iOS only. Sends a voice command to Siri service on iOS devices. Auto-approves consent dialogs, captures Siri's response text, then dismisses the Siri UI.
324
342
  Use for triggering SiriKit intents and App Shortcuts registered by apps (media playback, messaging, banking shortcuts, etc.).
325
343
  The captured response is stored in "siri_response" and returned in the step result. If Siri asks a follow-up question, reformulate the prompt with more detail and call siri again.
326
344
  <field name="prompt" required="yes">Voice command text</field>
@@ -383,6 +401,15 @@ const DEVICE_AUTOMATION_REF = `<device-automation-reference>
383
401
  <example>{"action": "assert_screen_changed", "threshold_percent": 15}</example>
384
402
  <note>Pattern: observe(screenshot) then action then delay then assert_screen_changed. Do NOT observe after the action — it resets the baseline.</note>
385
403
  </action>
404
+
405
+ <action name="ai_assert">
406
+ <field name="assert_prompt" required="yes"/>
407
+ <field name="include" type="[]string" note="opt-in extra context: screenshot, ocr (iOS). UI tree + the source script are always included."/>
408
+ <field name="timeout_ms" type="int" note="bounds the verdict (LLM/CLI reply), excluding context gathering. Default 60000."/>
409
+ <field name="message" note="prefixes the failure reason"/>
410
+ <example>{"action": "ai_assert", "assert_prompt": "the reply answers the user's question and is not an error", "include": ["screenshot"]}</example>
411
+ <note>Judges a natural-language assertion with the user's configured agent — either an LLM API provider (direct call) or Claude Code (spawned, reports back via report_assertion). Use for non-deterministic content (AI/LLM output, dynamic feeds) where exact-match assertions don't work. Treat as a soft assertion — it is non-deterministic.</note>
412
+ </action>
386
413
  </assertions>
387
414
 
388
415
  <metrics>
@@ -495,6 +522,7 @@ const TESTING_REF = `<testing-reference>
495
522
 
496
523
  <actions>
497
524
  app "com.example.app" — launch app
525
+ app "com.example.app" fresh — kill + launch for clean state
498
526
  kill_app "com.example.app" — force-close app
499
527
  tap "Text" — tap by text
500
528
  tap "Field" near "Label" — tap near another element
@@ -526,6 +554,7 @@ const TESTING_REF = `<testing-reference>
526
554
  drag "Item" to "Trash" — drag element
527
555
  drag 100,200 to 300,400 duration:500 — coordinate drag
528
556
  drag "App" to "Folder" press_duration:500 hold_duration:200 — press-hold-move-hold-release
557
+ drag_path 100,400 150,300:150 300,500:300 - multi-point path (X,Y:moveMs, first point's :ms = optional press-hold)
529
558
  wait_for "Element" timeout:5000 — wait for element
530
559
  wait_for type:button bounds:bottom_half timeout:3000 — modifier-only
531
560
  delay 1000 — wait N ms
@@ -548,6 +577,8 @@ const TESTING_REF = `<testing-reference>
548
577
  assert_exists "Element" — element is on screen
549
578
  assert_not_exists "Element" — element is NOT on screen
550
579
  assert_exists "Header" bounds:top_right — with region filter
580
+ assert_exists value:"hello" — assert a field's entered value (exact); sees typed content, not placeholder
581
+ assert_exists value_contains:"@mail" — assert a substring of the entered value
551
582
  assert_count "Cell" expected:5 — element count
552
583
  checkpoint "name" — mark checkpoint
553
584
  </assertions>
@@ -620,9 +651,10 @@ const TESTING_REF = `<testing-reference>
620
651
  When the user asks to create an API from a mobile app flow:
621
652
  1. Observe the app and understand the flow
622
653
  2. Write a .mob script with # Param: declarations for inputs and extract actions for outputs
623
- 3. Save it to {MOBAI_DATA_DIR}/apis/{name}.mobflat (gmail-send.mob) or nested (gmail/send.mob)
624
- 4. Test it with test_run using project_dir: {MOBAI_DATA_DIR}/apis/ and case_path: {name}.mob
625
- 5. List available APIs: GET /api/v1/apis
654
+ 3. Use app "bundle.id" fresh to ensure a clean start the app may be left on any screen from a previous call
655
+ 4. Save it to {MOBAI_DATA_DIR}/apis/{name}.mob — flat (gmail-send.mob) or nested (gmail/send.mob)
656
+ 5. Test it with test_run using project_dir: {MOBAI_DATA_DIR}/apis/ and case_path: {name}.mob
657
+ 6. List available APIs: GET /api/v1/apis
626
658
  Call an API: POST /api/v1/apis/run/{name} with {"device_id": "...", "params": {...}}
627
659
  The {name} segment is the path inside apis/ minus the .mob extension.
628
660
  API runs do not persist results to .mobai/runs/ — only the extracted values come back in the response.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mobai-mcp",
3
- "version": "2.3.0",
3
+ "version": "2.3.1",
4
4
  "mcpName": "io.github.MobAI-App/mobai-mcp",
5
5
  "description": "MCP server for MobAI - AI-powered mobile device automation",
6
6
  "type": "module",
package/server.json CHANGED
@@ -6,12 +6,12 @@
6
6
  "url": "https://github.com/MobAI-App/mobai-mcp",
7
7
  "source": "github"
8
8
  },
9
- "version": "2.3.0",
9
+ "version": "2.3.1",
10
10
  "packages": [
11
11
  {
12
12
  "registryType": "npm",
13
13
  "identifier": "mobai-mcp",
14
- "version": "2.3.0",
14
+ "version": "2.3.1",
15
15
  "transport": {
16
16
  "type": "stdio"
17
17
  }