@nbakka/mcp-appium 2.0.5 → 2.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/lib/server.js +52 -63
  2. package/package.json +1 -1
package/lib/server.js CHANGED
@@ -146,7 +146,6 @@ const createMcpServer = () => {
146
146
  tool("mobile_list_elements_on_screen", "List elements on screen and their coordinates, with display text or accessibility label. Do not cache this result.", {}, async ({}) => {
147
147
  requireRobot();
148
148
  const elements = await robot.getElementsOnScreen();
149
- console.log(elements)
150
149
  const result = elements.map(element => {
151
150
  const out = {
152
151
  type: element.type,
@@ -215,83 +214,73 @@ await new Promise(resolve => setTimeout(resolve, 5000));
215
214
  return `Current device orientation is ${orientation}`;
216
215
  });
217
216
  tool(
218
- "mobile_create_session",
219
- "create a mobile session once so that session id can be used in other tools where it is needed",
220
- {},
221
- async () => {
222
- const capabilities = {
223
- platformName: "Android",
224
- "appium:udid": "emulator-5554",
225
- "appium:automationName": "UiAutomator2",
226
- "appium:noReset": true,
227
- "appium:appPackage": "com.locon.housing",
228
- "appium:appActivity": "com.locon.housing.presentation.MainActivity",
229
- };
230
-
231
- const payload = {
232
- capabilities: {
233
- firstMatch: [{}],
234
- alwaysMatch: capabilities,
235
- },
236
- };
217
+ "tap_by_text",
218
+ "tap an element by passing the text visible on screen",
219
+ { text: "string" },
220
+ async ({ text }) => {
221
+ const { execSync } = require("child_process");
222
+ const { XMLParser } = require("fast-xml-parser");
237
223
 
238
- const response = await fetch("http://localhost:4723/session", {
239
- method: "POST",
240
- headers: { "Content-Type": "application/json" },
241
- body: JSON.stringify(payload),
242
- });
224
+ if (!text) throw new Error("Input text is required");
243
225
 
244
- if (!response.ok) {
245
- throw new Error(`Failed to create session: ${response.statusText}`);
226
+ // 1. Dump UI Automator XML to stdout
227
+ let dump;
228
+ try {
229
+ dump = execSync("adb shell uiautomator dump /dev/tty", { maxBuffer: 10 * 1024 * 1024 }).toString();
230
+ } catch (e) {
231
+ throw new Error("Failed to dump UI Automator XML via adb");
246
232
  }
247
233
 
248
- const json = await response.json();
249
- return `Session created with sessionId: ${json.sessionId}`;
250
- }
251
- );
234
+ // 2. Parse XML
235
+ const parser = new XMLParser({
236
+ ignoreAttributes: false,
237
+ attributeNamePrefix: "",
238
+ });
239
+ const xmlObj = parser.parse(dump);
252
240
 
241
+ // 3. Recursive traversal to find matching element by text/content-desc/hint
242
+ function findElement(node) {
243
+ if (!node) return null;
253
244
 
254
- tool(
255
- "mobile_click",
256
- "Click an element identified by text using path",
257
- {
258
- sessionId: zod_1.z.string().describe("Appium session ID"),
259
- text: zod_1.z.string().describe("Visible text of the element to click"),
260
- },
261
- async ({ sessionId, text }) => {
262
- const xpath = `//*[@text="${text}"]`;
263
- const clickUrl = `http://localhost:4723/session/${sessionId}/element`;
245
+ const matchText = node.text || node["content-desc"] || node.hint || "";
246
+ if (matchText === text) return node;
264
247
 
265
- // Find element
266
- const findResponse = await fetch(clickUrl, {
267
- method: "POST",
268
- headers: { "Content-Type": "application/json" },
269
- body: JSON.stringify({ using: "xpath", value: xpath }),
270
- });
248
+ if (node.node) {
249
+ if (Array.isArray(node.node)) {
250
+ for (const child of node.node) {
251
+ const found = findElement(child);
252
+ if (found) return found;
253
+ }
254
+ } else {
255
+ return findElement(node.node);
256
+ }
257
+ }
271
258
 
272
- if (!findResponse.ok) {
273
- throw new Error(`Failed to find element: ${findResponse.statusText}`);
259
+ return null;
274
260
  }
275
261
 
276
- const findJson = await findResponse.json();
277
- if (!findJson.value || !findJson.value.elementId) {
278
- throw new Error(`Element with text "${text}" not found`);
279
- }
262
+ const element = findElement(xmlObj.hierarchy.node);
263
+ if (!element) throw new Error(`Element with text "${text}" not found`);
280
264
 
281
- const elementId = findJson.value.elementId;
265
+ // 4. Parse bounds: format "[left,top][right,bottom]"
266
+ const bounds = element.bounds;
267
+ if (!bounds) throw new Error("Element bounds not found");
282
268
 
283
- // Click element
284
- const clickElementUrl = `http://localhost:4723/session/${sessionId}/element/${elementId}/click`;
285
- const clickResponse = await fetch(clickElementUrl, { method: "POST" });
269
+ const coords = bounds.match(/\d+/g).map(Number);
270
+ if (coords.length !== 4) throw new Error("Invalid bounds format");
286
271
 
287
- if (!clickResponse.ok) {
288
- throw new Error(`Failed to click element: ${clickResponse.statusText}`);
289
- }
272
+ const [left, top, right, bottom] = coords;
273
+ const x = Math.floor((left + right) / 2);
274
+ const y = Math.floor((top + bottom) / 2);
290
275
 
291
- // Optional wait after click
292
- await new Promise((resolve) => setTimeout(resolve, 2000));
276
+ // 5. Tap via adb
277
+ try {
278
+ execSync(`adb shell input tap ${x} ${y}`);
279
+ } catch (e) {
280
+ throw new Error("Failed to perform tap via adb");
281
+ }
293
282
 
294
- return `Clicked on element with text: "${text}" in session: ${sessionId}`;
283
+ return `Tapped element with text "${text}" at (${x},${y})`;
295
284
  }
296
285
  );
297
286
  // async check for latest agent version
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nbakka/mcp-appium",
3
- "version": "2.0.5",
3
+ "version": "2.0.8",
4
4
  "description": "Appium MCP",
5
5
  "engines": {
6
6
  "node": ">=18"